Detect encoding in Response::text() (#256)
* Detect encoding and decode text response Fixes #246 * Try to get encoding from Content-Type header * Remove uchardet encoding detection for now * Add non utf-8 test case for Response::text() * Reduce copies
This commit is contained in:
@@ -11,6 +11,7 @@ categories = ["web-programming::http-client"]
|
||||
|
||||
[dependencies]
|
||||
bytes = "0.4"
|
||||
encoding_rs = "0.7"
|
||||
futures = "0.1.15"
|
||||
hyper = "0.11.9"
|
||||
hyper-tls = "0.1.2"
|
||||
|
||||
@@ -129,6 +129,7 @@
|
||||
//! [cookiejar_issue]: https://github.com/seanmonstar/reqwest/issues/14
|
||||
|
||||
extern crate bytes;
|
||||
extern crate encoding_rs;
|
||||
#[macro_use]
|
||||
extern crate futures;
|
||||
extern crate hyper;
|
||||
|
||||
@@ -2,7 +2,9 @@ use std::mem;
|
||||
use std::fmt;
|
||||
use std::io::{self, Read};
|
||||
use std::time::Duration;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use encoding_rs::{Encoding, UTF_8};
|
||||
use futures::{Async, Poll, Stream};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json;
|
||||
@@ -167,6 +169,11 @@ impl Response {
|
||||
|
||||
/// Get the response text.
|
||||
///
|
||||
/// This method decodes the response body with BOM sniffing
|
||||
/// and with malformed sequences replaced with the REPLACEMENT CHARACTER.
|
||||
/// Encoding is determinated from the `charset` parameter of `Content-Type` header,
|
||||
/// and defaults to `utf-8` if not presented.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
@@ -180,9 +187,28 @@ impl Response {
|
||||
let len = self.headers().get::<::header::ContentLength>()
|
||||
.map(|ct_len| **ct_len)
|
||||
.unwrap_or(0);
|
||||
let mut content = String::with_capacity(len as usize);
|
||||
self.read_to_string(&mut content).map_err(::error::from)?;
|
||||
Ok(content)
|
||||
let mut content = Vec::with_capacity(len as usize);
|
||||
self.read_to_end(&mut content).map_err(::error::from)?;
|
||||
let encoding_name = self.headers().get::<::header::ContentType>()
|
||||
.and_then(|content_type| {
|
||||
content_type.get_param("charset")
|
||||
.map(|charset| charset.as_str())
|
||||
})
|
||||
.unwrap_or("utf-8");
|
||||
let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(UTF_8);
|
||||
// a block because of borrow checker
|
||||
{
|
||||
let (text, _, _) = encoding.decode(&content);
|
||||
match text {
|
||||
Cow::Owned(s) => return Ok(s),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
// decoding returned Cow::Borrowed, meaning these bytes
|
||||
// are already valid utf8
|
||||
Ok(String::from_utf8_unchecked(content))
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy the response body into a writer.
|
||||
|
||||
@@ -38,6 +38,41 @@ fn test_response_text() {
|
||||
assert_eq!(b"Hello", body.as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_response_non_utf_8_text() {
|
||||
let server = server! {
|
||||
request: b"\
|
||||
GET /text HTTP/1.1\r\n\
|
||||
Host: $HOST\r\n\
|
||||
User-Agent: $USERAGENT\r\n\
|
||||
Accept: */*\r\n\
|
||||
Accept-Encoding: gzip\r\n\
|
||||
\r\n\
|
||||
",
|
||||
response: b"\
|
||||
HTTP/1.1 200 OK\r\n\
|
||||
Server: test\r\n\
|
||||
Content-Length: 4\r\n\
|
||||
Content-Type: text/plain; charset=gbk\r\n\
|
||||
\r\n\
|
||||
\xc4\xe3\xba\xc3\
|
||||
"
|
||||
};
|
||||
|
||||
let url = format!("http://{}/text", server.addr());
|
||||
let mut res = reqwest::get(&url).unwrap();
|
||||
assert_eq!(res.url().as_str(), &url);
|
||||
assert_eq!(res.status(), reqwest::StatusCode::Ok);
|
||||
assert_eq!(res.headers().get(),
|
||||
Some(&reqwest::header::Server::new("test".to_string())));
|
||||
assert_eq!(res.headers().get(),
|
||||
Some(&reqwest::header::ContentLength(4)));
|
||||
|
||||
let body = res.text().unwrap();
|
||||
assert_eq!("你好", &body);
|
||||
assert_eq!(b"\xe4\xbd\xa0\xe5\xa5\xbd", body.as_bytes()); // Now it's utf-8
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_response_copy_to() {
|
||||
let server = server! {
|
||||
|
||||
Reference in New Issue
Block a user