diff --git a/Cargo.toml b/Cargo.toml index fdcaa89..58e3cca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ categories = ["web-programming::http-client"] [dependencies] bytes = "0.4" +encoding_rs = "0.7" futures = "0.1.15" hyper = "0.11.9" hyper-tls = "0.1.2" diff --git a/src/lib.rs b/src/lib.rs index 18c6ebe..648dff5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -129,6 +129,7 @@ //! [cookiejar_issue]: https://github.com/seanmonstar/reqwest/issues/14 extern crate bytes; +extern crate encoding_rs; #[macro_use] extern crate futures; extern crate hyper; diff --git a/src/response.rs b/src/response.rs index f2defe9..6cfab3a 100644 --- a/src/response.rs +++ b/src/response.rs @@ -2,7 +2,9 @@ use std::mem; use std::fmt; use std::io::{self, Read}; use std::time::Duration; +use std::borrow::Cow; +use encoding_rs::{Encoding, UTF_8}; use futures::{Async, Poll, Stream}; use serde::de::DeserializeOwned; use serde_json; @@ -167,6 +169,11 @@ impl Response { /// Get the response text. /// + /// This method decodes the response body with BOM sniffing + /// and with malformed sequences replaced with the REPLACEMENT CHARACTER. + /// Encoding is determinated from the `charset` parameter of `Content-Type` header, + /// and defaults to `utf-8` if not presented. + /// /// # Example /// /// ```rust @@ -180,9 +187,28 @@ impl Response { let len = self.headers().get::<::header::ContentLength>() .map(|ct_len| **ct_len) .unwrap_or(0); - let mut content = String::with_capacity(len as usize); - self.read_to_string(&mut content).map_err(::error::from)?; - Ok(content) + let mut content = Vec::with_capacity(len as usize); + self.read_to_end(&mut content).map_err(::error::from)?; + let encoding_name = self.headers().get::<::header::ContentType>() + .and_then(|content_type| { + content_type.get_param("charset") + .map(|charset| charset.as_str()) + }) + .unwrap_or("utf-8"); + let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(UTF_8); + // a block because of borrow checker + { + let (text, _, _) = encoding.decode(&content); + match text { + Cow::Owned(s) => return Ok(s), + _ => (), + } + } + unsafe { + // decoding returned Cow::Borrowed, meaning these bytes + // are already valid utf8 + Ok(String::from_utf8_unchecked(content)) + } } /// Copy the response body into a writer. diff --git a/tests/client.rs b/tests/client.rs index 65731e9..099d3c7 100644 --- a/tests/client.rs +++ b/tests/client.rs @@ -38,6 +38,41 @@ fn test_response_text() { assert_eq!(b"Hello", body.as_bytes()); } +#[test] +fn test_response_non_utf_8_text() { + let server = server! { + request: b"\ + GET /text HTTP/1.1\r\n\ + Host: $HOST\r\n\ + User-Agent: $USERAGENT\r\n\ + Accept: */*\r\n\ + Accept-Encoding: gzip\r\n\ + \r\n\ + ", + response: b"\ + HTTP/1.1 200 OK\r\n\ + Server: test\r\n\ + Content-Length: 4\r\n\ + Content-Type: text/plain; charset=gbk\r\n\ + \r\n\ + \xc4\xe3\xba\xc3\ + " + }; + + let url = format!("http://{}/text", server.addr()); + let mut res = reqwest::get(&url).unwrap(); + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::Ok); + assert_eq!(res.headers().get(), + Some(&reqwest::header::Server::new("test".to_string()))); + assert_eq!(res.headers().get(), + Some(&reqwest::header::ContentLength(4))); + + let body = res.text().unwrap(); + assert_eq!("你好", &body); + assert_eq!(b"\xe4\xbd\xa0\xe5\xa5\xbd", body.as_bytes()); // Now it's utf-8 +} + #[test] fn test_response_copy_to() { let server = server! {