Detect encoding in Response::text() (#256)
* Detect encoding and decode text response Fixes #246 * Try to get encoding from Content-Type header * Remove uchardet encoding detection for now * Add non utf-8 test case for Response::text() * Reduce copies
This commit is contained in:
		| @@ -129,6 +129,7 @@ | ||||
| //! [cookiejar_issue]: https://github.com/seanmonstar/reqwest/issues/14 | ||||
|  | ||||
| extern crate bytes; | ||||
| extern crate encoding_rs; | ||||
| #[macro_use] | ||||
| extern crate futures; | ||||
| extern crate hyper; | ||||
|   | ||||
| @@ -2,7 +2,9 @@ use std::mem; | ||||
| use std::fmt; | ||||
| use std::io::{self, Read}; | ||||
| use std::time::Duration; | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use encoding_rs::{Encoding, UTF_8}; | ||||
| use futures::{Async, Poll, Stream}; | ||||
| use serde::de::DeserializeOwned; | ||||
| use serde_json; | ||||
| @@ -167,6 +169,11 @@ impl Response { | ||||
|  | ||||
|     /// Get the response text. | ||||
|     /// | ||||
|     /// This method decodes the response body with BOM sniffing | ||||
|     /// and with malformed sequences replaced with the REPLACEMENT CHARACTER. | ||||
|     /// Encoding is determinated from the `charset` parameter of `Content-Type` header, | ||||
|     /// and defaults to `utf-8` if not presented. | ||||
|     /// | ||||
|     /// # Example | ||||
|     /// | ||||
|     /// ```rust | ||||
| @@ -180,9 +187,28 @@ impl Response { | ||||
|         let len = self.headers().get::<::header::ContentLength>() | ||||
|             .map(|ct_len| **ct_len) | ||||
|             .unwrap_or(0); | ||||
|         let mut content = String::with_capacity(len as usize); | ||||
|         self.read_to_string(&mut content).map_err(::error::from)?; | ||||
|         Ok(content) | ||||
|         let mut content = Vec::with_capacity(len as usize); | ||||
|         self.read_to_end(&mut content).map_err(::error::from)?; | ||||
|         let encoding_name = self.headers().get::<::header::ContentType>() | ||||
|             .and_then(|content_type| { | ||||
|                 content_type.get_param("charset") | ||||
|                     .map(|charset| charset.as_str()) | ||||
|             }) | ||||
|             .unwrap_or("utf-8"); | ||||
|         let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(UTF_8); | ||||
|         // a block because of borrow checker | ||||
|         { | ||||
|             let (text, _, _) = encoding.decode(&content); | ||||
|             match text { | ||||
|                 Cow::Owned(s) => return Ok(s), | ||||
|                 _ => (), | ||||
|             } | ||||
|         } | ||||
|         unsafe { | ||||
|             // decoding returned Cow::Borrowed, meaning these bytes | ||||
|             // are already valid utf8 | ||||
|             Ok(String::from_utf8_unchecked(content)) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Copy the response body into a writer. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user