From d00fc1476596a080317676b5b4d90605e743c19c Mon Sep 17 00:00:00 2001 From: Jonathan Reem Date: Sat, 20 Sep 2014 03:41:43 -0700 Subject: [PATCH 1/6] Store Header Items behind an RWLock This removes the need to receive `&mut self` in `get_ref` and `get.` Since the interior mutability of the RWLock is needed only once, it is safe to change the lifetime of the pointer given by read locks as by then all mutation has been done. Since `set` still requires `&mut self` there is no way to use the interior mutability of the RWLock to modify an existing `&`-reference. However, the use of interior mutability in `get_ref` means that `get_raw` is now actually an unsafe operation because the (now `*const`) pointer could be invalidated by a subsequent call to `get_ref.` Fixes #47 --- src/header/mod.rs | 57 ++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/src/header/mod.rs b/src/header/mod.rs index c4ce71fc..915c1312 100644 --- a/src/header/mod.rs +++ b/src/header/mod.rs @@ -13,6 +13,7 @@ use std::raw::TraitObject; use std::str::{from_utf8, SendStr, Slice, Owned}; use std::string::raw; use std::collections::hashmap::{HashMap, Entries, Occupied, Vacant}; +use std::sync::RWLock; use uany::UncheckedAnyDowncast; use typeable::Typeable; @@ -24,7 +25,7 @@ use {HttpResult}; pub mod common; /// A trait for any object that will represent a header field and value. -pub trait Header: Typeable { +pub trait Header: Typeable + Send + Sync { /// Returns the name of the header field this belongs to. /// /// The market `Option` is to hint to the type system which implementation @@ -68,7 +69,7 @@ fn header_name() -> &'static str { /// A map of header fields on requests and responses. pub struct Headers { - data: HashMap + data: HashMap> } impl Headers { @@ -92,13 +93,14 @@ impl Headers { raw::from_utf8(name) }; - let item = match headers.data.entry(CaseInsensitive(Owned(name))) { - Vacant(entry) => entry.set(Raw(vec![])), + let name = CaseInsensitive(Owned(name)); + let item = match headers.data.entry(name) { + Vacant(entry) => entry.set(RWLock::new(Raw(vec![]))), Occupied(entry) => entry.into_mut() }; - match *item { - Raw(ref mut raw) => raw.push(value), + match &mut *item.write() { + &Raw(ref mut raw) => raw.push(value), // Unreachable _ => {} }; @@ -113,7 +115,7 @@ impl Headers { /// /// The field is determined by the type of the value being set. pub fn set(&mut self, value: H) { - self.data.insert(CaseInsensitive(Slice(header_name::())), Typed(box value as Box
)); + self.data.insert(CaseInsensitive(Slice(header_name::())), RWLock::new(Typed(box value as Box
))); } /// Get a clone of the header field's value, if it exists. @@ -126,7 +128,7 @@ impl Headers { /// # let mut headers = Headers::new(); /// let content_type = headers.get::(); /// ``` - pub fn get(&mut self) -> Option { + pub fn get(&self) -> Option { self.get_ref().map(|v: &H| v.clone()) } @@ -136,26 +138,29 @@ impl Headers { /// If the header field has already been parsed into a typed header, /// then you *must* access it through that representation. /// + /// This operation is unsafe because the raw representation can be + /// invalidated by lasting too long or by the header being parsed + /// while you still have a reference to the data. + /// /// Example: /// ``` /// # use hyper::header::Headers; /// # let mut headers = Headers::new(); /// let raw_content_type = unsafe { headers.get_raw("content-type") }; /// ``` - pub unsafe fn get_raw(&self, name: &'static str) -> Option<&[Vec]> { + pub unsafe fn get_raw(&self, name: &'static str) -> Option<*const [Vec]> { self.data.find(&CaseInsensitive(Slice(name))).and_then(|item| { - match *item { - Raw(ref raw) => Some(raw.as_slice()), + match *item.read() { + Raw(ref raw) => Some(raw.as_slice() as *const [Vec]), _ => None } }) } /// Get a reference to the header field's value, if it exists. - pub fn get_ref(&mut self) -> Option<&H> { - self.data.find_mut(&CaseInsensitive(Slice(header_name::()))).and_then(|item| { - debug!("get_ref, name={}, val={}", header_name::(), item); - let header = match *item { + pub fn get_ref(&self) -> Option<&H> { + self.data.find(&CaseInsensitive(Slice(header_name::()))).and_then(|item| { + let header = match *item.read() { // Huge borrowck hack here, should be refactored to just return here. Typed(ref typed) if typed.is::() => None, // Typed, wrong type @@ -170,7 +175,7 @@ impl Headers { match header { Some(header) => { - *item = Typed(box header as Box
); + *item.write() = Typed(box header as Box
); Some(item) }, None => { @@ -178,14 +183,15 @@ impl Headers { } } }).and_then(|item| { - debug!("downcasting {}", item); - let ret = match *item { + let read = item.read(); + debug!("downcasting {}", *read); + let ret = match *read { Typed(ref val) => { unsafe { Some(val.downcast_ref_unchecked()) } }, _ => unreachable!() }; - ret + unsafe { transmute::, Option<&H>>(ret) } }) } @@ -229,7 +235,7 @@ impl fmt::Show for Headers { /// An `Iterator` over the fields in a `Headers` map. pub struct HeadersItems<'a> { - inner: Entries<'a, CaseInsensitive, Item> + inner: Entries<'a, CaseInsensitive, RWLock> } impl<'a> Iterator<(&'a str, HeaderView<'a>)> for HeadersItems<'a> { @@ -242,12 +248,12 @@ impl<'a> Iterator<(&'a str, HeaderView<'a>)> for HeadersItems<'a> { } /// Returned with the `HeadersItems` iterator. -pub struct HeaderView<'a>(&'a Item); +pub struct HeaderView<'a>(&'a RWLock); impl<'a> fmt::Show for HeaderView<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { let HeaderView(item) = *self; - item.fmt(fmt) + item.read().fmt(fmt) } } @@ -265,7 +271,7 @@ impl Mutable for Headers { enum Item { Raw(Vec>), - Typed(Box
) + Typed(Box
) } impl fmt::Show for Item { @@ -341,7 +347,7 @@ mod tests { #[test] fn test_from_raw() { - let mut headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); + let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); assert_eq!(headers.get_ref(), Some(&ContentLength(10))); } @@ -380,8 +386,9 @@ mod tests { #[test] fn test_different_structs_for_same_header() { - let mut headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); + let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); let ContentLength(_) = headers.get::().unwrap(); assert!(headers.get::().is_none()); } } + From 90dbef1d03339d88e7dfa1a3ec2ffc096d11e5aa Mon Sep 17 00:00:00 2001 From: Jonathan Reem Date: Sat, 20 Sep 2014 03:45:28 -0700 Subject: [PATCH 2/6] Add tests for double-reads for Header to ensure safety. --- src/header/mod.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/header/mod.rs b/src/header/mod.rs index 915c1312..59594a34 100644 --- a/src/header/mod.rs +++ b/src/header/mod.rs @@ -390,5 +390,20 @@ mod tests { let ContentLength(_) = headers.get::().unwrap(); assert!(headers.get::().is_none()); } + + #[test] + fn test_multiple_reads() { + let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); + let ContentLength(one) = headers.get::().unwrap(); + let ContentLength(two) = headers.get::().unwrap(); + assert_eq!(one, two); + } + + #[test] + fn test_different_reads() { + let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\nContent-Type: text/plain\r\n\r\n")).unwrap(); + let ContentLength(_) = headers.get::().unwrap(); + let ContentType(_) = headers.get::().unwrap(); + } } From e85ae48e16b90bf214df20c6fe0fc84a828dd568 Mon Sep 17 00:00:00 2001 From: Jonathan Reem Date: Sat, 20 Sep 2014 03:45:54 -0700 Subject: [PATCH 3/6] Update server and client for changes in Headers mutability. --- src/client/response.rs | 2 +- src/server/request.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/client/response.rs b/src/client/response.rs index 3474ac06..d8ad69d3 100644 --- a/src/client/response.rs +++ b/src/client/response.rs @@ -27,7 +27,7 @@ impl Response { pub fn new(stream: Box) -> HttpResult { let mut stream = BufferedReader::new(stream.abstract()); let (version, status) = try!(read_status_line(&mut stream)); - let mut headers = try!(header::Headers::from_raw(&mut stream)); + let headers = try!(header::Headers::from_raw(&mut stream)); debug!("{} {}", version, status); debug!("{}", headers); diff --git a/src/server/request.rs b/src/server/request.rs index 39f81d58..4aa0e58a 100644 --- a/src/server/request.rs +++ b/src/server/request.rs @@ -39,7 +39,7 @@ impl Request { let remote_addr = try_io!(stream.peer_name()); let mut stream = BufferedReader::new(stream.abstract()); let (method, uri, version) = try!(read_request_line(&mut stream)); - let mut headers = try!(Headers::from_raw(&mut stream)); + let headers = try!(Headers::from_raw(&mut stream)); debug!("{} {} {}", method, uri, version); debug!("{}", headers); From 91cc29e0aa77a948c5fb7894bf851a02382c3123 Mon Sep 17 00:00:00 2001 From: Jonathan Reem Date: Sat, 20 Sep 2014 04:03:55 -0700 Subject: [PATCH 4/6] Fixed a race condition in `get_ref` If two threads attempted to `get_ref` the same Header under two representations, then it was possible that the second thread would overwrite the work of the first thread, causing the first thread to do an unchecked downcast to the wrong type. In the case where they were the same Header representation, then the only damage would be that it would be parsed twice and would possibly return a false negative in one thread. The new code checks that it was not a queued lock and will not override the work of another thread, but will accept it if the other thread parsed the header as the same type. This also prevents duplicate parsing. --- src/header/mod.rs | 59 +++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/src/header/mod.rs b/src/header/mod.rs index 59594a34..6d8319c7 100644 --- a/src/header/mod.rs +++ b/src/header/mod.rs @@ -160,38 +160,53 @@ impl Headers { /// Get a reference to the header field's value, if it exists. pub fn get_ref(&self) -> Option<&H> { self.data.find(&CaseInsensitive(Slice(header_name::()))).and_then(|item| { - let header = match *item.read() { + let done = match *item.read() { // Huge borrowck hack here, should be refactored to just return here. - Typed(ref typed) if typed.is::() => None, - // Typed, wrong type + Typed(ref typed) if typed.is::() => true, + + // Typed, wrong type. Typed(_) => return None, - Raw(ref raw) => match Header::parse_header(raw.as_slice()) { - Some::(h) => { - Some(h) - }, - None => return None - }, + + // Raw, work to do. + Raw(_) => false, }; - match header { - Some(header) => { - *item.write() = Typed(box header as Box
); - Some(item) - }, - None => { - Some(item) + // borrowck hack continued + if done { return Some(item); } + + // Take out a write lock to do the parsing and mutation. + let mut write = item.write(); + + let header = match *write { + // Since this lock can queue, it's possible another thread just + // did the work for us. + // + // Check they inserted the correct type and move on. + Typed(ref typed) if typed.is::() => return Some(item), + + // Wrong type, another thread got here before us and parsed + // as a different representation. + Typed(_) => return None, + + // We are first in the queue or the only ones, so do the actual + // work of parsing and mutation. + Raw(ref raw) => match Header::parse_header(raw.as_slice()) { + Some::(h) => h, + None => return None } - } - }).and_then(|item| { + }; + + // Mutate in the raw case. + *write = Typed(box header as Box
); + Some(item) + }).map(|item| { let read = item.read(); debug!("downcasting {}", *read); let ret = match *read { - Typed(ref val) => { - unsafe { Some(val.downcast_ref_unchecked()) } - }, + Typed(ref val) => unsafe { val.downcast_ref_unchecked() }, _ => unreachable!() }; - unsafe { transmute::, Option<&H>>(ret) } + unsafe { transmute::<&H, &H>(ret) } }) } From 858a09304af851b9137c47b17d7c5badaa07c250 Mon Sep 17 00:00:00 2001 From: Jonathan Reem Date: Sat, 20 Sep 2014 04:18:25 -0700 Subject: [PATCH 5/6] Remove get and rename get_ref to get Since `get_ref` (now `get`) takes `&self` there is no need for a special cloning method. --- src/client/request.rs | 4 ++-- src/client/response.rs | 4 ++-- src/header/mod.rs | 16 +--------------- src/server/request.rs | 2 +- src/server/response.rs | 4 ++-- 5 files changed, 8 insertions(+), 22 deletions(-) diff --git a/src/client/request.rs b/src/client/request.rs index 9c9afc57..4130e986 100644 --- a/src/client/request.rs +++ b/src/client/request.rs @@ -111,7 +111,7 @@ impl Request { let mut chunked = true; let mut len = 0; - match self.headers.get_ref::() { + match self.headers.get::() { Some(cl) => { chunked = false; len = cl.len(); @@ -122,7 +122,7 @@ impl Request { // cant do in match above, thanks borrowck if chunked { //TODO: use CollectionViews (when implemented) to prevent double hash/lookup - let encodings = match self.headers.get::() { + let encodings = match self.headers.get::().map(|h| h.clone()) { Some(common::TransferEncoding(mut encodings)) => { //TODO: check if chunked is already in encodings. use HashSet? encodings.push(common::transfer_encoding::Chunked); diff --git a/src/client/response.rs b/src/client/response.rs index d8ad69d3..7124cf6c 100644 --- a/src/client/response.rs +++ b/src/client/response.rs @@ -33,7 +33,7 @@ impl Response { debug!("{}", headers); let body = if headers.has::() { - match headers.get_ref::() { + match headers.get::() { Some(&TransferEncoding(ref codings)) => { if codings.len() > 1 { debug!("TODO: #2 handle other codings: {}", codings); @@ -49,7 +49,7 @@ impl Response { None => unreachable!() } } else if headers.has::() { - match headers.get_ref::() { + match headers.get::() { Some(&ContentLength(len)) => SizedReader(stream, len), None => unreachable!() } diff --git a/src/header/mod.rs b/src/header/mod.rs index 6d8319c7..6a9c7e98 100644 --- a/src/header/mod.rs +++ b/src/header/mod.rs @@ -118,20 +118,6 @@ impl Headers { self.data.insert(CaseInsensitive(Slice(header_name::())), RWLock::new(Typed(box value as Box
))); } - /// Get a clone of the header field's value, if it exists. - /// - /// Example: - /// - /// ``` - /// # use hyper::header::Headers; - /// # use hyper::header::common::ContentType; - /// # let mut headers = Headers::new(); - /// let content_type = headers.get::(); - /// ``` - pub fn get(&self) -> Option { - self.get_ref().map(|v: &H| v.clone()) - } - /// Access the raw value of a header, if it exists and has not /// been already parsed. /// @@ -158,7 +144,7 @@ impl Headers { } /// Get a reference to the header field's value, if it exists. - pub fn get_ref(&self) -> Option<&H> { + pub fn get(&self) -> Option<&H> { self.data.find(&CaseInsensitive(Slice(header_name::()))).and_then(|item| { let done = match *item.read() { // Huge borrowck hack here, should be refactored to just return here. diff --git a/src/server/request.rs b/src/server/request.rs index 4aa0e58a..91989dcd 100644 --- a/src/server/request.rs +++ b/src/server/request.rs @@ -46,7 +46,7 @@ impl Request { let body = if headers.has::() { - match headers.get_ref::() { + match headers.get::() { Some(&ContentLength(len)) => SizedReader(stream, len), None => unreachable!() } diff --git a/src/server/response.rs b/src/server/response.rs index aa779a24..800ffc87 100644 --- a/src/server/response.rs +++ b/src/server/response.rs @@ -72,7 +72,7 @@ impl Response { let mut chunked = true; let mut len = 0; - match self.headers.get_ref::() { + match self.headers.get::() { Some(cl) => { chunked = false; len = cl.len(); @@ -83,7 +83,7 @@ impl Response { // cant do in match above, thanks borrowck if chunked { //TODO: use CollectionViews (when implemented) to prevent double hash/lookup - let encodings = match self.headers.get::() { + let encodings = match self.headers.get::().map(|h| h.clone()) { Some(common::TransferEncoding(mut encodings)) => { //TODO: check if chunked is already in encodings. use HashSet? encodings.push(common::transfer_encoding::Chunked); From d3a62fa0d531907b3e61167a57e5480cef810d87 Mon Sep 17 00:00:00 2001 From: Jonathan Reem Date: Sat, 20 Sep 2014 05:52:41 -0700 Subject: [PATCH 6/6] Add get_mut for modifying the typed representation of Headers. Also adds an associated test and updates code to use it instead of cloning and setting when possible. --- src/client/request.rs | 15 ++++++---- src/header/mod.rs | 62 ++++++++++++++++++++++++++++++++---------- src/server/response.rs | 15 ++++++---- 3 files changed, 65 insertions(+), 27 deletions(-) diff --git a/src/client/request.rs b/src/client/request.rs index 4130e986..dc48aeb4 100644 --- a/src/client/request.rs +++ b/src/client/request.rs @@ -121,16 +121,19 @@ impl Request { // cant do in match above, thanks borrowck if chunked { - //TODO: use CollectionViews (when implemented) to prevent double hash/lookup - let encodings = match self.headers.get::().map(|h| h.clone()) { - Some(common::TransferEncoding(mut encodings)) => { + let encodings = match self.headers.get_mut::() { + Some(&common::TransferEncoding(ref mut encodings)) => { //TODO: check if chunked is already in encodings. use HashSet? encodings.push(common::transfer_encoding::Chunked); - encodings + false }, - None => vec![common::transfer_encoding::Chunked] + None => true }; - self.headers.set(common::TransferEncoding(encodings)); + + if encodings { + self.headers.set::( + common::TransferEncoding(vec![common::transfer_encoding::Chunked])) + } } for (name, header) in self.headers.iter() { diff --git a/src/header/mod.rs b/src/header/mod.rs index 6a9c7e98..7dc8e9a5 100644 --- a/src/header/mod.rs +++ b/src/header/mod.rs @@ -15,7 +15,7 @@ use std::string::raw; use std::collections::hashmap::{HashMap, Entries, Occupied, Vacant}; use std::sync::RWLock; -use uany::UncheckedAnyDowncast; +use uany::{UncheckedAnyDowncast, UncheckedAnyMutDowncast}; use typeable::Typeable; use http::read_header; @@ -62,6 +62,14 @@ impl<'a> UncheckedAnyDowncast<'a> for &'a Header { } } +impl<'a> UncheckedAnyMutDowncast<'a> for &'a mut Header { + #[inline] + unsafe fn downcast_mut_unchecked(self) -> &'a mut T { + let to: TraitObject = transmute_copy(&self); + transmute(to.data) + } +} + fn header_name() -> &'static str { let name = Header::header_name(None::); name @@ -145,6 +153,31 @@ impl Headers { /// Get a reference to the header field's value, if it exists. pub fn get(&self) -> Option<&H> { + self.get_or_parse::().map(|item| { + let read = item.read(); + debug!("downcasting {}", *read); + let ret = match *read { + Typed(ref val) => unsafe { val.downcast_ref_unchecked() }, + _ => unreachable!() + }; + unsafe { transmute::<&H, &H>(ret) } + }) + } + + /// Get a mutable reference to the header field's value, if it exists. + pub fn get_mut(&mut self) -> Option<&mut H> { + self.get_or_parse::().map(|item| { + let mut write = item.write(); + debug!("downcasting {}", *write); + let ret = match *&mut *write { + Typed(ref mut val) => unsafe { val.downcast_mut_unchecked() }, + _ => unreachable!() + }; + unsafe { transmute::<&mut H, &mut H>(ret) } + }) + } + + fn get_or_parse(&self) -> Option<&RWLock> { self.data.find(&CaseInsensitive(Slice(header_name::()))).and_then(|item| { let done = match *item.read() { // Huge borrowck hack here, should be refactored to just return here. @@ -185,14 +218,6 @@ impl Headers { // Mutate in the raw case. *write = Typed(box header as Box
); Some(item) - }).map(|item| { - let read = item.read(); - debug!("downcasting {}", *read); - let ret = match *read { - Typed(ref val) => unsafe { val.downcast_ref_unchecked() }, - _ => unreachable!() - }; - unsafe { transmute::<&H, &H>(ret) } }) } @@ -349,7 +374,7 @@ mod tests { #[test] fn test_from_raw() { let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); - assert_eq!(headers.get_ref(), Some(&ContentLength(10))); + assert_eq!(headers.get(), Some(&ContentLength(10))); } #[test] @@ -388,23 +413,30 @@ mod tests { #[test] fn test_different_structs_for_same_header() { let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); - let ContentLength(_) = headers.get::().unwrap(); + let ContentLength(_) = *headers.get::().unwrap(); assert!(headers.get::().is_none()); } #[test] fn test_multiple_reads() { let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\n\r\n")).unwrap(); - let ContentLength(one) = headers.get::().unwrap(); - let ContentLength(two) = headers.get::().unwrap(); + let ContentLength(one) = *headers.get::().unwrap(); + let ContentLength(two) = *headers.get::().unwrap(); assert_eq!(one, two); } #[test] fn test_different_reads() { let headers = Headers::from_raw(&mut mem("Content-Length: 10\r\nContent-Type: text/plain\r\n\r\n")).unwrap(); - let ContentLength(_) = headers.get::().unwrap(); - let ContentType(_) = headers.get::().unwrap(); + let ContentLength(_) = *headers.get::().unwrap(); + let ContentType(_) = *headers.get::().unwrap(); + } + + #[test] + fn test_get_mutable() { + let mut headers = Headers::from_raw(&mut mem("Content-Length: 10\r\nContent-Type: text/plain\r\n\r\n")).unwrap(); + *headers.get_mut::().unwrap() = ContentLength(20); + assert_eq!(*headers.get::().unwrap(), ContentLength(20)); } } diff --git a/src/server/response.rs b/src/server/response.rs index 800ffc87..040d1796 100644 --- a/src/server/response.rs +++ b/src/server/response.rs @@ -82,16 +82,19 @@ impl Response { // cant do in match above, thanks borrowck if chunked { - //TODO: use CollectionViews (when implemented) to prevent double hash/lookup - let encodings = match self.headers.get::().map(|h| h.clone()) { - Some(common::TransferEncoding(mut encodings)) => { + let encodings = match self.headers.get_mut::() { + Some(&common::TransferEncoding(ref mut encodings)) => { //TODO: check if chunked is already in encodings. use HashSet? encodings.push(common::transfer_encoding::Chunked); - encodings + false }, - None => vec![common::transfer_encoding::Chunked] + None => true }; - self.headers.set(common::TransferEncoding(encodings)); + + if encodings { + self.headers.set::( + common::TransferEncoding(vec![common::transfer_encoding::Chunked])) + } } for (name, header) in self.headers.iter() {