Detect encoding in Response::text() (#256)
* Detect encoding and decode text response Fixes #246 * Try to get encoding from Content-Type header * Remove uchardet encoding detection for now * Add non utf-8 test case for Response::text() * Reduce copies
This commit is contained in:
@@ -11,6 +11,7 @@ categories = ["web-programming::http-client"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bytes = "0.4"
|
bytes = "0.4"
|
||||||
|
encoding_rs = "0.7"
|
||||||
futures = "0.1.15"
|
futures = "0.1.15"
|
||||||
hyper = "0.11.9"
|
hyper = "0.11.9"
|
||||||
hyper-tls = "0.1.2"
|
hyper-tls = "0.1.2"
|
||||||
|
|||||||
@@ -129,6 +129,7 @@
|
|||||||
//! [cookiejar_issue]: https://github.com/seanmonstar/reqwest/issues/14
|
//! [cookiejar_issue]: https://github.com/seanmonstar/reqwest/issues/14
|
||||||
|
|
||||||
extern crate bytes;
|
extern crate bytes;
|
||||||
|
extern crate encoding_rs;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate futures;
|
extern crate futures;
|
||||||
extern crate hyper;
|
extern crate hyper;
|
||||||
|
|||||||
@@ -2,7 +2,9 @@ use std::mem;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::io::{self, Read};
|
use std::io::{self, Read};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use encoding_rs::{Encoding, UTF_8};
|
||||||
use futures::{Async, Poll, Stream};
|
use futures::{Async, Poll, Stream};
|
||||||
use serde::de::DeserializeOwned;
|
use serde::de::DeserializeOwned;
|
||||||
use serde_json;
|
use serde_json;
|
||||||
@@ -167,6 +169,11 @@ impl Response {
|
|||||||
|
|
||||||
/// Get the response text.
|
/// Get the response text.
|
||||||
///
|
///
|
||||||
|
/// This method decodes the response body with BOM sniffing
|
||||||
|
/// and with malformed sequences replaced with the REPLACEMENT CHARACTER.
|
||||||
|
/// Encoding is determinated from the `charset` parameter of `Content-Type` header,
|
||||||
|
/// and defaults to `utf-8` if not presented.
|
||||||
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
@@ -180,9 +187,28 @@ impl Response {
|
|||||||
let len = self.headers().get::<::header::ContentLength>()
|
let len = self.headers().get::<::header::ContentLength>()
|
||||||
.map(|ct_len| **ct_len)
|
.map(|ct_len| **ct_len)
|
||||||
.unwrap_or(0);
|
.unwrap_or(0);
|
||||||
let mut content = String::with_capacity(len as usize);
|
let mut content = Vec::with_capacity(len as usize);
|
||||||
self.read_to_string(&mut content).map_err(::error::from)?;
|
self.read_to_end(&mut content).map_err(::error::from)?;
|
||||||
Ok(content)
|
let encoding_name = self.headers().get::<::header::ContentType>()
|
||||||
|
.and_then(|content_type| {
|
||||||
|
content_type.get_param("charset")
|
||||||
|
.map(|charset| charset.as_str())
|
||||||
|
})
|
||||||
|
.unwrap_or("utf-8");
|
||||||
|
let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(UTF_8);
|
||||||
|
// a block because of borrow checker
|
||||||
|
{
|
||||||
|
let (text, _, _) = encoding.decode(&content);
|
||||||
|
match text {
|
||||||
|
Cow::Owned(s) => return Ok(s),
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
// decoding returned Cow::Borrowed, meaning these bytes
|
||||||
|
// are already valid utf8
|
||||||
|
Ok(String::from_utf8_unchecked(content))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Copy the response body into a writer.
|
/// Copy the response body into a writer.
|
||||||
|
|||||||
@@ -38,6 +38,41 @@ fn test_response_text() {
|
|||||||
assert_eq!(b"Hello", body.as_bytes());
|
assert_eq!(b"Hello", body.as_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_response_non_utf_8_text() {
|
||||||
|
let server = server! {
|
||||||
|
request: b"\
|
||||||
|
GET /text HTTP/1.1\r\n\
|
||||||
|
Host: $HOST\r\n\
|
||||||
|
User-Agent: $USERAGENT\r\n\
|
||||||
|
Accept: */*\r\n\
|
||||||
|
Accept-Encoding: gzip\r\n\
|
||||||
|
\r\n\
|
||||||
|
",
|
||||||
|
response: b"\
|
||||||
|
HTTP/1.1 200 OK\r\n\
|
||||||
|
Server: test\r\n\
|
||||||
|
Content-Length: 4\r\n\
|
||||||
|
Content-Type: text/plain; charset=gbk\r\n\
|
||||||
|
\r\n\
|
||||||
|
\xc4\xe3\xba\xc3\
|
||||||
|
"
|
||||||
|
};
|
||||||
|
|
||||||
|
let url = format!("http://{}/text", server.addr());
|
||||||
|
let mut res = reqwest::get(&url).unwrap();
|
||||||
|
assert_eq!(res.url().as_str(), &url);
|
||||||
|
assert_eq!(res.status(), reqwest::StatusCode::Ok);
|
||||||
|
assert_eq!(res.headers().get(),
|
||||||
|
Some(&reqwest::header::Server::new("test".to_string())));
|
||||||
|
assert_eq!(res.headers().get(),
|
||||||
|
Some(&reqwest::header::ContentLength(4)));
|
||||||
|
|
||||||
|
let body = res.text().unwrap();
|
||||||
|
assert_eq!("你好", &body);
|
||||||
|
assert_eq!(b"\xe4\xbd\xa0\xe5\xa5\xbd", body.as_bytes()); // Now it's utf-8
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_response_copy_to() {
|
fn test_response_copy_to() {
|
||||||
let server = server! {
|
let server = server! {
|
||||||
|
|||||||
Reference in New Issue
Block a user