Merge pull request #703 from malept/header-parsing-with-character-encoding
feat(headers): add extended parameter parser to the public API
This commit is contained in:
		| @@ -8,11 +8,11 @@ | |||||||
|  |  | ||||||
| use language_tags::LanguageTag; | use language_tags::LanguageTag; | ||||||
| use std::fmt; | use std::fmt; | ||||||
| use std::str::FromStr; |  | ||||||
| use unicase::UniCase; | use unicase::UniCase; | ||||||
| use url::percent_encoding; | use url::percent_encoding; | ||||||
|  |  | ||||||
| use header::{Header, HeaderFormat, parsing}; | use header::{Header, HeaderFormat, parsing}; | ||||||
|  | use header::parsing::parse_extended_value; | ||||||
| use header::shared::Charset; | use header::shared::Charset; | ||||||
|  |  | ||||||
| /// The implied disposition of the content of the HTTP body | /// The implied disposition of the content of the HTTP body | ||||||
| @@ -133,8 +133,8 @@ impl Header for ContentDisposition { | |||||||
|                             Charset::Ext("UTF-8".to_owned()), None, |                             Charset::Ext("UTF-8".to_owned()), None, | ||||||
|                             val.trim_matches('"').as_bytes().to_owned()) |                             val.trim_matches('"').as_bytes().to_owned()) | ||||||
|                     } else if UniCase(&*key) == UniCase("filename*") { |                     } else if UniCase(&*key) == UniCase("filename*") { | ||||||
|                         let (charset, opt_language, value) = try!(parse_ext_value(val)); |                         let extended_value = try!(parse_extended_value(val)); | ||||||
|                         DispositionParam::Filename(charset, opt_language, value) |                         DispositionParam::Filename(extended_value.charset, extended_value.language_tag, extended_value.value) | ||||||
|                     } else { |                     } else { | ||||||
|                         DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned()) |                         DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned()) | ||||||
|                     } |                     } | ||||||
| @@ -195,68 +195,6 @@ impl fmt::Display for ContentDisposition { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Parsing of `ext-value` |  | ||||||
| /// https://tools.ietf.org/html/rfc5987#section-3.2 |  | ||||||
| /// |  | ||||||
| /// # ABNF |  | ||||||
| /// ```plain |  | ||||||
| /// ext-value     = charset  "'" [ language ] "'" value-chars |  | ||||||
| ///               ; like RFC 2231's <extended-initial-value> |  | ||||||
| ///               ; (see [RFC2231], Section 7) |  | ||||||
| /// |  | ||||||
| /// charset       = "UTF-8" / "ISO-8859-1" / mime-charset |  | ||||||
| /// |  | ||||||
| /// mime-charset  = 1*mime-charsetc |  | ||||||
| /// mime-charsetc = ALPHA / DIGIT |  | ||||||
| ///               / "!" / "#" / "$" / "%" / "&" |  | ||||||
| ///               / "+" / "-" / "^" / "_" / "`" |  | ||||||
| ///               / "{" / "}" / "~" |  | ||||||
| ///               ; as <mime-charset> in Section 2.3 of [RFC2978] |  | ||||||
| ///               ; except that the single quote is not included |  | ||||||
| ///               ; SHOULD be registered in the IANA charset registry |  | ||||||
| /// |  | ||||||
| /// language      = <Language-Tag, defined in [RFC5646], Section 2.1> |  | ||||||
| /// |  | ||||||
| /// value-chars   = *( pct-encoded / attr-char ) |  | ||||||
| /// |  | ||||||
| /// pct-encoded   = "%" HEXDIG HEXDIG |  | ||||||
| ///               ; see [RFC3986], Section 2.1 |  | ||||||
| /// |  | ||||||
| /// attr-char     = ALPHA / DIGIT |  | ||||||
| ///               / "!" / "#" / "$" / "&" / "+" / "-" / "." |  | ||||||
| ///               / "^" / "_" / "`" / "|" / "~" |  | ||||||
| ///               ; token except ( "*" / "'" / "%" ) |  | ||||||
| /// ``` |  | ||||||
| fn parse_ext_value(val: &str) -> ::Result<(Charset, Option<LanguageTag>, Vec<u8>)> { |  | ||||||
|  |  | ||||||
|     // Break into three pieces separated by the single-quote character |  | ||||||
|     let mut parts = val.splitn(3,'\''); |  | ||||||
|  |  | ||||||
|     // Interpret the first piece as a Charset |  | ||||||
|     let charset: Charset = match parts.next() { |  | ||||||
|         None => return Err(::Error::Header), |  | ||||||
|         Some(n) => try!(FromStr::from_str(n)), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     // Interpret the second piece as a language tag |  | ||||||
|     let lang: Option<LanguageTag> = match parts.next() { |  | ||||||
|         None => return Err(::Error::Header), |  | ||||||
|         Some("") => None, |  | ||||||
|         Some(s) => match s.parse() { |  | ||||||
|             Ok(lt) => Some(lt), |  | ||||||
|             Err(_) => return Err(::Error::Header), |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     // Interpret the third piece as a sequence of value characters |  | ||||||
|     let value: Vec<u8> = match parts.next() { |  | ||||||
|         None => return Err(::Error::Header), |  | ||||||
|         Some(v) => percent_encoding::percent_decode(v.as_bytes()), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     Ok( (charset, lang, value) ) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod tests { | mod tests { | ||||||
|     use super::{ContentDisposition,DispositionType,DispositionParam}; |     use super::{ContentDisposition,DispositionType,DispositionParam}; | ||||||
|   | |||||||
| @@ -1,7 +1,12 @@ | |||||||
| //! Utility functions for Header implementations. | //! Utility functions for Header implementations. | ||||||
|  |  | ||||||
|  | use language_tags::LanguageTag; | ||||||
| use std::str; | use std::str; | ||||||
|  | use std::str::FromStr; | ||||||
| use std::fmt::{self, Display}; | use std::fmt::{self, Display}; | ||||||
|  | use url::percent_encoding; | ||||||
|  |  | ||||||
|  | use header::shared::Charset; | ||||||
|  |  | ||||||
| /// Reads a single raw string when parsing a header. | /// Reads a single raw string when parsing a header. | ||||||
| pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> ::Result<T> { | pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> ::Result<T> { | ||||||
| @@ -48,3 +53,131 @@ pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> f | |||||||
|     } |     } | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// An extended header parameter value (i.e., tagged with a character set and optionally, | ||||||
|  | /// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). | ||||||
|  | pub struct ExtendedValue { | ||||||
|  |     pub charset: Charset, | ||||||
|  |     pub language_tag: Option<LanguageTag>, | ||||||
|  |     pub value: Vec<u8>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Parses extended header parameter values (`ext-value`), as defined in | ||||||
|  | /// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). | ||||||
|  | /// | ||||||
|  | /// Extended values are denoted by parameter names that end with `*`. | ||||||
|  | /// | ||||||
|  | /// ## ABNF | ||||||
|  | /// ```plain | ||||||
|  | /// ext-value     = charset  "'" [ language ] "'" value-chars | ||||||
|  | ///               ; like RFC 2231's <extended-initial-value> | ||||||
|  | ///               ; (see [RFC2231], Section 7) | ||||||
|  | /// | ||||||
|  | /// charset       = "UTF-8" / "ISO-8859-1" / mime-charset | ||||||
|  | /// | ||||||
|  | /// mime-charset  = 1*mime-charsetc | ||||||
|  | /// mime-charsetc = ALPHA / DIGIT | ||||||
|  | ///               / "!" / "#" / "$" / "%" / "&" | ||||||
|  | ///               / "+" / "-" / "^" / "_" / "`" | ||||||
|  | ///               / "{" / "}" / "~" | ||||||
|  | ///               ; as <mime-charset> in Section 2.3 of [RFC2978] | ||||||
|  | ///               ; except that the single quote is not included | ||||||
|  | ///               ; SHOULD be registered in the IANA charset registry | ||||||
|  | /// | ||||||
|  | /// language      = <Language-Tag, defined in [RFC5646], Section 2.1> | ||||||
|  | /// | ||||||
|  | /// value-chars   = *( pct-encoded / attr-char ) | ||||||
|  | /// | ||||||
|  | /// pct-encoded   = "%" HEXDIG HEXDIG | ||||||
|  | ///               ; see [RFC3986], Section 2.1 | ||||||
|  | /// | ||||||
|  | /// attr-char     = ALPHA / DIGIT | ||||||
|  | ///               / "!" / "#" / "$" / "&" / "+" / "-" / "." | ||||||
|  | ///               / "^" / "_" / "`" / "|" / "~" | ||||||
|  | ///               ; token except ( "*" / "'" / "%" ) | ||||||
|  | /// ``` | ||||||
|  | pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> { | ||||||
|  |  | ||||||
|  |     // Break into three pieces separated by the single-quote character | ||||||
|  |     let mut parts = val.splitn(3,'\''); | ||||||
|  |  | ||||||
|  |     // Interpret the first piece as a Charset | ||||||
|  |     let charset: Charset = match parts.next() { | ||||||
|  |         None => return Err(::Error::Header), | ||||||
|  |         Some(n) => try!(FromStr::from_str(n)), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // Interpret the second piece as a language tag | ||||||
|  |     let lang: Option<LanguageTag> = match parts.next() { | ||||||
|  |         None => return Err(::Error::Header), | ||||||
|  |         Some("") => None, | ||||||
|  |         Some(s) => match s.parse() { | ||||||
|  |             Ok(lt) => Some(lt), | ||||||
|  |             Err(_) => return Err(::Error::Header), | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // Interpret the third piece as a sequence of value characters | ||||||
|  |     let value: Vec<u8> = match parts.next() { | ||||||
|  |         None => return Err(::Error::Header), | ||||||
|  |         Some(v) => percent_encoding::percent_decode(v.as_bytes()), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     Ok(ExtendedValue { | ||||||
|  |         charset: charset, | ||||||
|  |         language_tag: lang, | ||||||
|  |         value: value, | ||||||
|  |     }) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tests { | ||||||
|  |     use header::shared::Charset; | ||||||
|  |     use super::parse_extended_value; | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_parse_extended_value_with_encoding_and_language_tag() { | ||||||
|  |         let expected_language_tag = langtag!(en); | ||||||
|  |         // RFC 5987, Section 3.2.2 | ||||||
|  |         // Extended notation, using the Unicode character U+00A3 (POUND SIGN) | ||||||
|  |         let result = parse_extended_value("iso-8859-1'en'%A3%20rates"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |         let extended_value = result.unwrap(); | ||||||
|  |         assert_eq!(Charset::Iso_8859_1, extended_value.charset); | ||||||
|  |         assert!(extended_value.language_tag.is_some()); | ||||||
|  |         assert_eq!(expected_language_tag, extended_value.language_tag.unwrap()); | ||||||
|  |         assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_parse_extended_value_with_encoding() { | ||||||
|  |         // RFC 5987, Section 3.2.2 | ||||||
|  |         // Extended notation, using the Unicode characters U+00A3 (POUND SIGN) | ||||||
|  |         // and U+20AC (EURO SIGN) | ||||||
|  |         let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates"); | ||||||
|  |         assert!(result.is_ok()); | ||||||
|  |         let extended_value = result.unwrap(); | ||||||
|  |         assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset); | ||||||
|  |         assert!(extended_value.language_tag.is_none()); | ||||||
|  |         assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_parse_extended_value_missing_language_tag_and_encoding() { | ||||||
|  |         // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2 | ||||||
|  |         let result = parse_extended_value("foo%20bar.html"); | ||||||
|  |         assert!(result.is_err()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_parse_extended_value_partially_formatted() { | ||||||
|  |         let result = parse_extended_value("UTF-8'missing third part"); | ||||||
|  |         assert!(result.is_err()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_parse_extended_value_partially_formatted_blank() { | ||||||
|  |         let result = parse_extended_value("blank second part'"); | ||||||
|  |         assert!(result.is_err()); | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user