feat(headers): add extended parameter parser to the public API
Move the extended parameter parser from the Content-Disposition header implementation into the common header parsing module. This allows crates that use Hyper to parse RFC 5987-compliant header parameter values. Add tests based on the examples given in the RFC.
This commit is contained in:
		| @@ -8,11 +8,11 @@ | ||||
|  | ||||
| use language_tags::LanguageTag; | ||||
| use std::fmt; | ||||
| use std::str::FromStr; | ||||
| use unicase::UniCase; | ||||
| use url::percent_encoding; | ||||
|  | ||||
| use header::{Header, HeaderFormat, parsing}; | ||||
| use header::parsing::parse_extended_value; | ||||
| use header::shared::Charset; | ||||
|  | ||||
| /// The implied disposition of the content of the HTTP body | ||||
| @@ -133,8 +133,8 @@ impl Header for ContentDisposition { | ||||
|                             Charset::Ext("UTF-8".to_owned()), None, | ||||
|                             val.trim_matches('"').as_bytes().to_owned()) | ||||
|                     } else if UniCase(&*key) == UniCase("filename*") { | ||||
|                         let (charset, opt_language, value) = try!(parse_ext_value(val)); | ||||
|                         DispositionParam::Filename(charset, opt_language, value) | ||||
|                         let extended_value = try!(parse_extended_value(val)); | ||||
|                         DispositionParam::Filename(extended_value.charset, extended_value.language_tag, extended_value.value) | ||||
|                     } else { | ||||
|                         DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned()) | ||||
|                     } | ||||
| @@ -195,68 +195,6 @@ impl fmt::Display for ContentDisposition { | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Parsing of `ext-value` | ||||
| /// https://tools.ietf.org/html/rfc5987#section-3.2 | ||||
| /// | ||||
| /// # ABNF | ||||
| /// ```plain | ||||
| /// ext-value     = charset  "'" [ language ] "'" value-chars | ||||
| ///               ; like RFC 2231's <extended-initial-value> | ||||
| ///               ; (see [RFC2231], Section 7) | ||||
| /// | ||||
| /// charset       = "UTF-8" / "ISO-8859-1" / mime-charset | ||||
| /// | ||||
| /// mime-charset  = 1*mime-charsetc | ||||
| /// mime-charsetc = ALPHA / DIGIT | ||||
| ///               / "!" / "#" / "$" / "%" / "&" | ||||
| ///               / "+" / "-" / "^" / "_" / "`" | ||||
| ///               / "{" / "}" / "~" | ||||
| ///               ; as <mime-charset> in Section 2.3 of [RFC2978] | ||||
| ///               ; except that the single quote is not included | ||||
| ///               ; SHOULD be registered in the IANA charset registry | ||||
| /// | ||||
| /// language      = <Language-Tag, defined in [RFC5646], Section 2.1> | ||||
| /// | ||||
| /// value-chars   = *( pct-encoded / attr-char ) | ||||
| /// | ||||
| /// pct-encoded   = "%" HEXDIG HEXDIG | ||||
| ///               ; see [RFC3986], Section 2.1 | ||||
| /// | ||||
| /// attr-char     = ALPHA / DIGIT | ||||
| ///               / "!" / "#" / "$" / "&" / "+" / "-" / "." | ||||
| ///               / "^" / "_" / "`" / "|" / "~" | ||||
| ///               ; token except ( "*" / "'" / "%" ) | ||||
| /// ``` | ||||
| fn parse_ext_value(val: &str) -> ::Result<(Charset, Option<LanguageTag>, Vec<u8>)> { | ||||
|  | ||||
|     // Break into three pieces separated by the single-quote character | ||||
|     let mut parts = val.splitn(3,'\''); | ||||
|  | ||||
|     // Interpret the first piece as a Charset | ||||
|     let charset: Charset = match parts.next() { | ||||
|         None => return Err(::Error::Header), | ||||
|         Some(n) => try!(FromStr::from_str(n)), | ||||
|     }; | ||||
|  | ||||
|     // Interpret the second piece as a language tag | ||||
|     let lang: Option<LanguageTag> = match parts.next() { | ||||
|         None => return Err(::Error::Header), | ||||
|         Some("") => None, | ||||
|         Some(s) => match s.parse() { | ||||
|             Ok(lt) => Some(lt), | ||||
|             Err(_) => return Err(::Error::Header), | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // Interpret the third piece as a sequence of value characters | ||||
|     let value: Vec<u8> = match parts.next() { | ||||
|         None => return Err(::Error::Header), | ||||
|         Some(v) => percent_encoding::percent_decode(v.as_bytes()), | ||||
|     }; | ||||
|  | ||||
|     Ok( (charset, lang, value) ) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::{ContentDisposition,DispositionType,DispositionParam}; | ||||
|   | ||||
| @@ -1,7 +1,12 @@ | ||||
| //! Utility functions for Header implementations. | ||||
|  | ||||
| use language_tags::LanguageTag; | ||||
| use std::str; | ||||
| use std::str::FromStr; | ||||
| use std::fmt::{self, Display}; | ||||
| use url::percent_encoding; | ||||
|  | ||||
| use header::shared::Charset; | ||||
|  | ||||
| /// Reads a single raw string when parsing a header. | ||||
| pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> ::Result<T> { | ||||
| @@ -48,3 +53,131 @@ pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> f | ||||
|     } | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| /// An extended header parameter value (i.e., tagged with a character set and optionally, | ||||
| /// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). | ||||
| pub struct ExtendedValue { | ||||
|     pub charset: Charset, | ||||
|     pub language_tag: Option<LanguageTag>, | ||||
|     pub value: Vec<u8>, | ||||
| } | ||||
|  | ||||
| /// Parses extended header parameter values (`ext-value`), as defined in | ||||
| /// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2). | ||||
| /// | ||||
| /// Extended values are denoted by parameter names that end with `*`. | ||||
| /// | ||||
| /// ## ABNF | ||||
| /// ```plain | ||||
| /// ext-value     = charset  "'" [ language ] "'" value-chars | ||||
| ///               ; like RFC 2231's <extended-initial-value> | ||||
| ///               ; (see [RFC2231], Section 7) | ||||
| /// | ||||
| /// charset       = "UTF-8" / "ISO-8859-1" / mime-charset | ||||
| /// | ||||
| /// mime-charset  = 1*mime-charsetc | ||||
| /// mime-charsetc = ALPHA / DIGIT | ||||
| ///               / "!" / "#" / "$" / "%" / "&" | ||||
| ///               / "+" / "-" / "^" / "_" / "`" | ||||
| ///               / "{" / "}" / "~" | ||||
| ///               ; as <mime-charset> in Section 2.3 of [RFC2978] | ||||
| ///               ; except that the single quote is not included | ||||
| ///               ; SHOULD be registered in the IANA charset registry | ||||
| /// | ||||
| /// language      = <Language-Tag, defined in [RFC5646], Section 2.1> | ||||
| /// | ||||
| /// value-chars   = *( pct-encoded / attr-char ) | ||||
| /// | ||||
| /// pct-encoded   = "%" HEXDIG HEXDIG | ||||
| ///               ; see [RFC3986], Section 2.1 | ||||
| /// | ||||
| /// attr-char     = ALPHA / DIGIT | ||||
| ///               / "!" / "#" / "$" / "&" / "+" / "-" / "." | ||||
| ///               / "^" / "_" / "`" / "|" / "~" | ||||
| ///               ; token except ( "*" / "'" / "%" ) | ||||
| /// ``` | ||||
| pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> { | ||||
|  | ||||
|     // Break into three pieces separated by the single-quote character | ||||
|     let mut parts = val.splitn(3,'\''); | ||||
|  | ||||
|     // Interpret the first piece as a Charset | ||||
|     let charset: Charset = match parts.next() { | ||||
|         None => return Err(::Error::Header), | ||||
|         Some(n) => try!(FromStr::from_str(n)), | ||||
|     }; | ||||
|  | ||||
|     // Interpret the second piece as a language tag | ||||
|     let lang: Option<LanguageTag> = match parts.next() { | ||||
|         None => return Err(::Error::Header), | ||||
|         Some("") => None, | ||||
|         Some(s) => match s.parse() { | ||||
|             Ok(lt) => Some(lt), | ||||
|             Err(_) => return Err(::Error::Header), | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // Interpret the third piece as a sequence of value characters | ||||
|     let value: Vec<u8> = match parts.next() { | ||||
|         None => return Err(::Error::Header), | ||||
|         Some(v) => percent_encoding::percent_decode(v.as_bytes()), | ||||
|     }; | ||||
|  | ||||
|     Ok(ExtendedValue { | ||||
|         charset: charset, | ||||
|         language_tag: lang, | ||||
|         value: value, | ||||
|     }) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use header::shared::Charset; | ||||
|     use super::parse_extended_value; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_parse_extended_value_with_encoding_and_language_tag() { | ||||
|         let expected_language_tag = langtag!(en); | ||||
|         // RFC 5987, Section 3.2.2 | ||||
|         // Extended notation, using the Unicode character U+00A3 (POUND SIGN) | ||||
|         let result = parse_extended_value("iso-8859-1'en'%A3%20rates"); | ||||
|         assert!(result.is_ok()); | ||||
|         let extended_value = result.unwrap(); | ||||
|         assert_eq!(Charset::Iso_8859_1, extended_value.charset); | ||||
|         assert!(extended_value.language_tag.is_some()); | ||||
|         assert_eq!(expected_language_tag, extended_value.language_tag.unwrap()); | ||||
|         assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_parse_extended_value_with_encoding() { | ||||
|         // RFC 5987, Section 3.2.2 | ||||
|         // Extended notation, using the Unicode characters U+00A3 (POUND SIGN) | ||||
|         // and U+20AC (EURO SIGN) | ||||
|         let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates"); | ||||
|         assert!(result.is_ok()); | ||||
|         let extended_value = result.unwrap(); | ||||
|         assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset); | ||||
|         assert!(extended_value.language_tag.is_none()); | ||||
|         assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_parse_extended_value_missing_language_tag_and_encoding() { | ||||
|         // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2 | ||||
|         let result = parse_extended_value("foo%20bar.html"); | ||||
|         assert!(result.is_err()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_parse_extended_value_partially_formatted() { | ||||
|         let result = parse_extended_value("UTF-8'missing third part"); | ||||
|         assert!(result.is_err()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_parse_extended_value_partially_formatted_blank() { | ||||
|         let result = parse_extended_value("blank second part'"); | ||||
|         assert!(result.is_err()); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user