1 // Copyright (c) 2023 Huawei Device Co., Ltd.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 mod headers;
15 mod mix;
16 mod multi;
17 mod part;
18 
19 pub(crate) use headers::{DecodeHeaders, EncodeHeaders, HeaderStatus};
20 pub(crate) use mix::MixFrom;
21 pub use multi::{MimeMulti, MimeMultiBuilder, XPart};
22 pub(crate) use part::PartStatus;
23 pub use part::{MimePart, MimePartBuilder};
24 pub(crate) type SizeResult = Result<usize, std::io::Error>;
25 pub(crate) type TokenResult<T> = Result<TokenStatus<usize, T>, std::io::Error>;
26 pub(crate) type BytesResult<'a> = Result<TokenStatus<(&'a [u8], &'a [u8]), &'a [u8]>, HttpError>;
27 
28 use core::mem::take;
29 use std::io::Read;
30 
31 use crate::error::{ErrorKind, HttpError};
32 use crate::headers::Headers;
33 
34 // RFC5234 ABNF
35 // horizontal tab
36 pub(crate) const HTAB: u8 = b'\t';
37 // 0x20 space
38 pub(crate) const SP: u8 = b' ';
39 // carriage return
40 pub(crate) const CR: u8 = b'\r';
41 // linefeed
42 pub(crate) const LF: u8 = b'\n';
43 pub(crate) const CRLF: &[u8] = b"\r\n";
44 
45 /// Represents component encoding/decoding status.
46 #[derive(Debug, Eq, PartialEq)]
47 pub enum TokenStatus<T, E> {
48     /// The current component is partially encoded.
49     Partial(E),
50     /// The current component is completely encoded.
51     Complete(T),
52 }
53 
54 impl<T, E> TokenStatus<T, E> {
55     /// Checks whether is TokenStatus::Complete(T).
is_complete(&self) -> bool56     pub fn is_complete(&self) -> bool {
57         match self {
58             TokenStatus::Partial(_) => false,
59             TokenStatus::Complete(_) => true,
60         }
61     }
62 
63     /// Gets the complete inner type.
get_complete_once(self) -> Option<T>64     pub fn get_complete_once(self) -> Option<T> {
65         match self {
66             TokenStatus::Partial(_) => None,
67             TokenStatus::Complete(inner) => Some(inner),
68         }
69     }
70 }
71 
72 // Pulls some bytes from this src into the buf, returning how many bytes were
73 // read.
data_copy(src: &[u8], src_idx: &mut usize, buf: &mut [u8]) -> TokenResult<usize>74 pub(crate) fn data_copy(src: &[u8], src_idx: &mut usize, buf: &mut [u8]) -> TokenResult<usize> {
75     let input_len = src.len() - *src_idx;
76     let output_len = buf.len();
77     // sync
78     let num = (&src[*src_idx..]).read(buf)?;
79     *src_idx += num;
80     if output_len >= input_len {
81         return Ok(TokenStatus::Complete(num));
82     }
83     Ok(TokenStatus::Partial(num))
84 }
85 
86 // removes front *LWSP-char(b' ' or b'\t')
trim_front_lwsp(buf: &[u8]) -> &[u8]87 pub(crate) fn trim_front_lwsp(buf: &[u8]) -> &[u8] {
88     let mut idx = 0;
89     for b in buf.iter() {
90         match *b {
91             SP | HTAB => idx += 1,
92             _ => break,
93         }
94     }
95     &buf[idx..]
96 }
97 
98 // removes back *LWSP-char(b' ' or b'\t')
trim_back_lwsp(buf: &[u8]) -> &[u8]99 fn trim_back_lwsp(buf: &[u8]) -> &[u8] {
100     let mut idx = 0;
101     for b in buf.iter().rev() {
102         match *b {
103             SP | HTAB => idx += 1,
104             _ => break,
105         }
106     }
107     &buf[..buf.len() - idx]
108 }
109 
110 /// buf is not empty, and end with '\n'.
111 /// removes back *LWSP-char(b' ' or b'\t')
trim_back_lwsp_if_end_with_lf(buf: &[u8]) -> &[u8]112 pub(crate) fn trim_back_lwsp_if_end_with_lf(buf: &[u8]) -> &[u8] {
113     let mut temp = &buf[..buf.len() - 1];
114     if temp.ends_with(&[CR]) {
115         temp = &temp[..temp.len() - 1];
116     }
117     trim_back_lwsp(temp)
118 }
119 
120 // reduce "\n" or "\r\n"
consume_crlf( buf: &[u8], cr_meet: bool, ) -> Result<TokenStatus<&[u8], usize>, HttpError>121 pub(crate) fn consume_crlf(
122     buf: &[u8],
123     // has "\r"
124     cr_meet: bool,
125 ) -> Result<TokenStatus<&[u8], usize>, HttpError> {
126     if buf.is_empty() {
127         return Ok(TokenStatus::Partial(0));
128     }
129     match buf[0] {
130         CR => {
131             if cr_meet {
132                 Err(ErrorKind::InvalidInput.into())
133             } else if buf.len() == 1 {
134                 Ok(TokenStatus::Partial(1))
135             } else if buf[1] == LF {
136                 Ok(TokenStatus::Complete(&buf[2..]))
137             } else {
138                 Err(ErrorKind::InvalidInput.into())
139             }
140         }
141         LF => Ok(TokenStatus::Complete(&buf[1..])),
142         _ => Err(ErrorKind::InvalidInput.into()),
143     }
144 }
145 
146 // end with "\n" or "\r\n"
get_crlf_contain(buf: &[u8]) -> TokenStatus<(&[u8], &[u8]), &[u8]>147 pub(crate) fn get_crlf_contain(buf: &[u8]) -> TokenStatus<(&[u8], &[u8]), &[u8]> {
148     for (i, b) in buf.iter().enumerate() {
149         if *b == LF {
150             return TokenStatus::Complete((&buf[..i + 1], &buf[i + 1..]));
151         }
152     }
153     TokenStatus::Partial(buf)
154 }
155 
156 // TODO: Replace with `[u8]::trim_ascii_start` when is stable.
trim_ascii_start(mut bytes: &[u8]) -> &[u8]157 fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
158     // Note: A pattern matching based approach (instead of indexing) allows
159     // making the function const.
160     while let [first, rest @ ..] = bytes {
161         if first.is_ascii_whitespace() {
162             bytes = rest;
163         } else {
164             break;
165         }
166     }
167     bytes
168 }
169 
170 // TODO: Replace with `[u8]::trim_ascii_end` when is stable.
trim_ascii_end(mut bytes: &[u8]) -> &[u8]171 fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
172     // Note: A pattern matching based approach (instead of indexing) allows
173     // making the function const.
174     while let [rest @ .., last] = bytes {
175         if last.is_ascii_whitespace() {
176             bytes = rest;
177         } else {
178             break;
179         }
180     }
181     bytes
182 }
183 
184 // TODO: Replace with `[u8]::trim_ascii` when is stable.
trim_ascii(bytes: &[u8]) -> &[u8]185 fn trim_ascii(bytes: &[u8]) -> &[u8] {
186     trim_ascii_end(trim_ascii_start(bytes))
187 }
188 
189 // get multipart boundary
get_content_type_boundary(headers: &Headers) -> Option<Vec<u8>>190 pub(crate) fn get_content_type_boundary(headers: &Headers) -> Option<Vec<u8>> {
191     let header_value = headers.get("Content-Type");
192     if let Some(value) = header_value {
193         let str = value.to_vec();
194         let str = trim_ascii(&str);
195         if !str.starts_with(b"multipart") {
196             return None;
197         }
198 
199         let boundary = str
200             .split(|b| *b == b';')
201             .map(trim_ascii)
202             .find(|s| s.starts_with(b"boundary="));
203 
204         if let Some(boundary) = boundary {
205             let boundary = trim_ascii_start(&boundary[9..]);
206             if boundary.len() > 2 && boundary.starts_with(&[b'"']) && boundary.ends_with(&[b'"']) {
207                 return Some(boundary[1..boundary.len() - 1].to_vec());
208             } else if !boundary.is_empty() {
209                 return Some(boundary[..].to_vec());
210             }
211         }
212     }
213     None
214 }
215 
216 #[cfg(test)]
217 mod ut_common {
218     use crate::body::mime::common::get_content_type_boundary;
219     use crate::headers::Headers;
220 
221     /// UT test cases for `get_content_type_boundary`.
222     ///
223     /// # Brief
224     /// 1. Creates a `Headers` and inserts key and value.
225     /// 2. Gets boundary from headers by `get_content_type_boundary`.
226     /// 3. Checks whether the result is correct.
227     #[test]
ut_get_content_type_boundary()228     fn ut_get_content_type_boundary() {
229         // common
230         let mut headers = Headers::new();
231         headers
232             .insert(
233                 "Content-Type",
234                 "multipart/mixed; boundary=gc0p4Jq0M2Yt08j34c0p",
235             )
236             .unwrap();
237         assert_eq!(
238             get_content_type_boundary(&headers),
239             Some(b"gc0p4Jq0M2Yt08j34c0p".to_vec())
240         );
241         // has LWSF
242         let mut headers = Headers::new();
243         headers
244             .insert(
245                 "Content-Type",
246                 "  multipart/mixed; boundary=   gc0p4Jq0M2Yt08j34c0p  ",
247             )
248             .unwrap();
249         assert_eq!(
250             get_content_type_boundary(&headers),
251             Some(b"gc0p4Jq0M2Yt08j34c0p".to_vec())
252         );
253         // has ""
254         let mut headers = Headers::new();
255         headers
256             .insert(
257                 "Content-Type",
258                 r#"multipart/mixed; boundary="gc0pJq0M:08jU534c0p""#,
259             )
260             .unwrap();
261         assert_eq!(
262             get_content_type_boundary(&headers),
263             Some(b"gc0pJq0M:08jU534c0p".to_vec())
264         );
265     }
266 }
267