Merge pull request #703 from malept/header-parsing-with-character-encoding
feat(headers): add extended parameter parser to the public API
This commit is contained in:
@@ -8,11 +8,11 @@
|
|||||||
|
|
||||||
use language_tags::LanguageTag;
|
use language_tags::LanguageTag;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::str::FromStr;
|
|
||||||
use unicase::UniCase;
|
use unicase::UniCase;
|
||||||
use url::percent_encoding;
|
use url::percent_encoding;
|
||||||
|
|
||||||
use header::{Header, HeaderFormat, parsing};
|
use header::{Header, HeaderFormat, parsing};
|
||||||
|
use header::parsing::parse_extended_value;
|
||||||
use header::shared::Charset;
|
use header::shared::Charset;
|
||||||
|
|
||||||
/// The implied disposition of the content of the HTTP body
|
/// The implied disposition of the content of the HTTP body
|
||||||
@@ -133,8 +133,8 @@ impl Header for ContentDisposition {
|
|||||||
Charset::Ext("UTF-8".to_owned()), None,
|
Charset::Ext("UTF-8".to_owned()), None,
|
||||||
val.trim_matches('"').as_bytes().to_owned())
|
val.trim_matches('"').as_bytes().to_owned())
|
||||||
} else if UniCase(&*key) == UniCase("filename*") {
|
} else if UniCase(&*key) == UniCase("filename*") {
|
||||||
let (charset, opt_language, value) = try!(parse_ext_value(val));
|
let extended_value = try!(parse_extended_value(val));
|
||||||
DispositionParam::Filename(charset, opt_language, value)
|
DispositionParam::Filename(extended_value.charset, extended_value.language_tag, extended_value.value)
|
||||||
} else {
|
} else {
|
||||||
DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned())
|
DispositionParam::Ext(key.to_owned(), val.trim_matches('"').to_owned())
|
||||||
}
|
}
|
||||||
@@ -195,68 +195,6 @@ impl fmt::Display for ContentDisposition {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parsing of `ext-value`
|
|
||||||
/// https://tools.ietf.org/html/rfc5987#section-3.2
|
|
||||||
///
|
|
||||||
/// # ABNF
|
|
||||||
/// ```plain
|
|
||||||
/// ext-value = charset "'" [ language ] "'" value-chars
|
|
||||||
/// ; like RFC 2231's <extended-initial-value>
|
|
||||||
/// ; (see [RFC2231], Section 7)
|
|
||||||
///
|
|
||||||
/// charset = "UTF-8" / "ISO-8859-1" / mime-charset
|
|
||||||
///
|
|
||||||
/// mime-charset = 1*mime-charsetc
|
|
||||||
/// mime-charsetc = ALPHA / DIGIT
|
|
||||||
/// / "!" / "#" / "$" / "%" / "&"
|
|
||||||
/// / "+" / "-" / "^" / "_" / "`"
|
|
||||||
/// / "{" / "}" / "~"
|
|
||||||
/// ; as <mime-charset> in Section 2.3 of [RFC2978]
|
|
||||||
/// ; except that the single quote is not included
|
|
||||||
/// ; SHOULD be registered in the IANA charset registry
|
|
||||||
///
|
|
||||||
/// language = <Language-Tag, defined in [RFC5646], Section 2.1>
|
|
||||||
///
|
|
||||||
/// value-chars = *( pct-encoded / attr-char )
|
|
||||||
///
|
|
||||||
/// pct-encoded = "%" HEXDIG HEXDIG
|
|
||||||
/// ; see [RFC3986], Section 2.1
|
|
||||||
///
|
|
||||||
/// attr-char = ALPHA / DIGIT
|
|
||||||
/// / "!" / "#" / "$" / "&" / "+" / "-" / "."
|
|
||||||
/// / "^" / "_" / "`" / "|" / "~"
|
|
||||||
/// ; token except ( "*" / "'" / "%" )
|
|
||||||
/// ```
|
|
||||||
fn parse_ext_value(val: &str) -> ::Result<(Charset, Option<LanguageTag>, Vec<u8>)> {
|
|
||||||
|
|
||||||
// Break into three pieces separated by the single-quote character
|
|
||||||
let mut parts = val.splitn(3,'\'');
|
|
||||||
|
|
||||||
// Interpret the first piece as a Charset
|
|
||||||
let charset: Charset = match parts.next() {
|
|
||||||
None => return Err(::Error::Header),
|
|
||||||
Some(n) => try!(FromStr::from_str(n)),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Interpret the second piece as a language tag
|
|
||||||
let lang: Option<LanguageTag> = match parts.next() {
|
|
||||||
None => return Err(::Error::Header),
|
|
||||||
Some("") => None,
|
|
||||||
Some(s) => match s.parse() {
|
|
||||||
Ok(lt) => Some(lt),
|
|
||||||
Err(_) => return Err(::Error::Header),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Interpret the third piece as a sequence of value characters
|
|
||||||
let value: Vec<u8> = match parts.next() {
|
|
||||||
None => return Err(::Error::Header),
|
|
||||||
Some(v) => percent_encoding::percent_decode(v.as_bytes()),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok( (charset, lang, value) )
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{ContentDisposition,DispositionType,DispositionParam};
|
use super::{ContentDisposition,DispositionType,DispositionParam};
|
||||||
|
|||||||
@@ -1,7 +1,12 @@
|
|||||||
//! Utility functions for Header implementations.
|
//! Utility functions for Header implementations.
|
||||||
|
|
||||||
|
use language_tags::LanguageTag;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
use std::str::FromStr;
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
use url::percent_encoding;
|
||||||
|
|
||||||
|
use header::shared::Charset;
|
||||||
|
|
||||||
/// Reads a single raw string when parsing a header.
|
/// Reads a single raw string when parsing a header.
|
||||||
pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> ::Result<T> {
|
pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> ::Result<T> {
|
||||||
@@ -48,3 +53,131 @@ pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> f
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An extended header parameter value (i.e., tagged with a character set and optionally,
|
||||||
|
/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
|
||||||
|
pub struct ExtendedValue {
|
||||||
|
pub charset: Charset,
|
||||||
|
pub language_tag: Option<LanguageTag>,
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses extended header parameter values (`ext-value`), as defined in
|
||||||
|
/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
|
||||||
|
///
|
||||||
|
/// Extended values are denoted by parameter names that end with `*`.
|
||||||
|
///
|
||||||
|
/// ## ABNF
|
||||||
|
/// ```plain
|
||||||
|
/// ext-value = charset "'" [ language ] "'" value-chars
|
||||||
|
/// ; like RFC 2231's <extended-initial-value>
|
||||||
|
/// ; (see [RFC2231], Section 7)
|
||||||
|
///
|
||||||
|
/// charset = "UTF-8" / "ISO-8859-1" / mime-charset
|
||||||
|
///
|
||||||
|
/// mime-charset = 1*mime-charsetc
|
||||||
|
/// mime-charsetc = ALPHA / DIGIT
|
||||||
|
/// / "!" / "#" / "$" / "%" / "&"
|
||||||
|
/// / "+" / "-" / "^" / "_" / "`"
|
||||||
|
/// / "{" / "}" / "~"
|
||||||
|
/// ; as <mime-charset> in Section 2.3 of [RFC2978]
|
||||||
|
/// ; except that the single quote is not included
|
||||||
|
/// ; SHOULD be registered in the IANA charset registry
|
||||||
|
///
|
||||||
|
/// language = <Language-Tag, defined in [RFC5646], Section 2.1>
|
||||||
|
///
|
||||||
|
/// value-chars = *( pct-encoded / attr-char )
|
||||||
|
///
|
||||||
|
/// pct-encoded = "%" HEXDIG HEXDIG
|
||||||
|
/// ; see [RFC3986], Section 2.1
|
||||||
|
///
|
||||||
|
/// attr-char = ALPHA / DIGIT
|
||||||
|
/// / "!" / "#" / "$" / "&" / "+" / "-" / "."
|
||||||
|
/// / "^" / "_" / "`" / "|" / "~"
|
||||||
|
/// ; token except ( "*" / "'" / "%" )
|
||||||
|
/// ```
|
||||||
|
pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> {
|
||||||
|
|
||||||
|
// Break into three pieces separated by the single-quote character
|
||||||
|
let mut parts = val.splitn(3,'\'');
|
||||||
|
|
||||||
|
// Interpret the first piece as a Charset
|
||||||
|
let charset: Charset = match parts.next() {
|
||||||
|
None => return Err(::Error::Header),
|
||||||
|
Some(n) => try!(FromStr::from_str(n)),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Interpret the second piece as a language tag
|
||||||
|
let lang: Option<LanguageTag> = match parts.next() {
|
||||||
|
None => return Err(::Error::Header),
|
||||||
|
Some("") => None,
|
||||||
|
Some(s) => match s.parse() {
|
||||||
|
Ok(lt) => Some(lt),
|
||||||
|
Err(_) => return Err(::Error::Header),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Interpret the third piece as a sequence of value characters
|
||||||
|
let value: Vec<u8> = match parts.next() {
|
||||||
|
None => return Err(::Error::Header),
|
||||||
|
Some(v) => percent_encoding::percent_decode(v.as_bytes()),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(ExtendedValue {
|
||||||
|
charset: charset,
|
||||||
|
language_tag: lang,
|
||||||
|
value: value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use header::shared::Charset;
|
||||||
|
use super::parse_extended_value;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_extended_value_with_encoding_and_language_tag() {
|
||||||
|
let expected_language_tag = langtag!(en);
|
||||||
|
// RFC 5987, Section 3.2.2
|
||||||
|
// Extended notation, using the Unicode character U+00A3 (POUND SIGN)
|
||||||
|
let result = parse_extended_value("iso-8859-1'en'%A3%20rates");
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let extended_value = result.unwrap();
|
||||||
|
assert_eq!(Charset::Iso_8859_1, extended_value.charset);
|
||||||
|
assert!(extended_value.language_tag.is_some());
|
||||||
|
assert_eq!(expected_language_tag, extended_value.language_tag.unwrap());
|
||||||
|
assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_extended_value_with_encoding() {
|
||||||
|
// RFC 5987, Section 3.2.2
|
||||||
|
// Extended notation, using the Unicode characters U+00A3 (POUND SIGN)
|
||||||
|
// and U+20AC (EURO SIGN)
|
||||||
|
let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates");
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let extended_value = result.unwrap();
|
||||||
|
assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset);
|
||||||
|
assert!(extended_value.language_tag.is_none());
|
||||||
|
assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_extended_value_missing_language_tag_and_encoding() {
|
||||||
|
// From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2
|
||||||
|
let result = parse_extended_value("foo%20bar.html");
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_extended_value_partially_formatted() {
|
||||||
|
let result = parse_extended_value("UTF-8'missing third part");
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_extended_value_partially_formatted_blank() {
|
||||||
|
let result = parse_extended_value("blank second part'");
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user