From 5c30ef30ecc32d800a880a334bcbcd05fb302bb1 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Mon, 5 Jun 2017 15:03:04 -0700 Subject: [PATCH] More HPACK encoding work --- src/hpack/encoder.rs | 272 +++++++++++++++++++++++++++++++++++++-- src/hpack/header.rs | 14 +- src/hpack/huffman/mod.rs | 34 ----- src/hpack/table.rs | 127 ++++++++++++++---- src/hpack/test.rs | 30 ++--- 5 files changed, 391 insertions(+), 86 deletions(-) diff --git a/src/hpack/encoder.rs b/src/hpack/encoder.rs index 41c0dd1..6fd6704 100644 --- a/src/hpack/encoder.rs +++ b/src/hpack/encoder.rs @@ -16,9 +16,9 @@ pub enum EncoderError { } impl Encoder { - pub fn new() -> Encoder { + pub fn new(max_size: usize, capacity: usize) -> Encoder { Encoder { - table: Table::with_capacity(0), + table: Table::new(max_size, capacity), max_size_update: None, } } @@ -41,29 +41,39 @@ impl Encoder { fn encode_header(&mut self, header: Header, dst: &mut BytesMut) -> Result<(), EncoderError> { - if header.is_sensitive() { - unimplemented!(); - } - match self.table.index(header) { - Index::Indexed(idx, _header) => { + Index::Indexed(idx, header) => { + assert!(!header.is_sensitive()); encode_int(idx, 7, 0x80, dst); } Index::Name(idx, header) => { - encode_int(idx, 4, 0, dst); + if header.is_sensitive() { + encode_int(idx, 4, 0b10000, dst); + } else { + encode_int(idx, 4, 0, dst); + } + encode_str(header.value_slice(), dst); } Index::Inserted(header) => { + assert!(!header.is_sensitive()); dst.put_u8(0b01000000); encode_str(header.name().as_slice(), dst); encode_str(header.value_slice(), dst); } Index::InsertedValue(idx, header) => { + assert!(!header.is_sensitive()); + encode_int(idx, 6, 0b01000000, dst); encode_str(header.value_slice(), dst); } Index::NotIndexed(header) => { - dst.put_u8(0); + if header.is_sensitive() { + dst.put_u8(0b10000); + } else { + dst.put_u8(0); + } + encode_str(header.name().as_slice(), dst); encode_str(header.value_slice(), dst); } @@ -73,6 +83,12 @@ impl Encoder { } } +impl Default for Encoder { + fn default() -> Encoder { + Encoder::new(4096, 0) + } +} + fn encode_str(val: &[u8], dst: &mut BytesMut) { use std::io::Cursor; @@ -153,3 +169,241 @@ fn encode_int( fn encode_int_one_byte(value: usize, prefix_bits: usize) -> bool { value < (1 << prefix_bits) - 1 } + +#[cfg(test)] +mod test { + use super::*; + use hpack::Header; + use http::*; + + #[test] + fn test_encode_method_get() { + let mut encoder = Encoder::default(); + let res = encode(&mut encoder, vec![method("GET")]); + assert_eq!(*res, [0x80 | 2]); + assert_eq!(encoder.table.len(), 0); + } + + #[test] + fn test_encode_method_post() { + let mut encoder = Encoder::default(); + let res = encode(&mut encoder, vec![method("POST")]); + assert_eq!(*res, [0x80 | 3]); + assert_eq!(encoder.table.len(), 0); + } + + #[test] + fn test_encode_method_patch() { + let mut encoder = Encoder::default(); + let res = encode(&mut encoder, vec![method("PATCH")]); + + assert_eq!(res[0], 0b01000000 | 2); // Incremental indexing w/ name pulled from table + assert_eq!(res[1], 0x80 | 5); // header value w/ huffman coding + + assert_eq!("PATCH", huff_decode(&res[2..7])); + assert_eq!(encoder.table.len(), 1); + + let res = encode(&mut encoder, vec![method("PATCH")]); + + assert_eq!(1 << 7 | 62, res[0]); + assert_eq!(1, res.len()); + } + + #[test] + fn test_repeated_headers_are_indexed() { + let mut encoder = Encoder::default(); + let res = encode(&mut encoder, vec![header("foo", "hello")]); + + assert_eq!(&[0b01000000, 0x80 | 2], &res[0..2]); + assert_eq!("foo", huff_decode(&res[2..4])); + assert_eq!(0x80 | 4, res[4]); + assert_eq!("hello", huff_decode(&res[5..])); + assert_eq!(9, res.len()); + + assert_eq!(1, encoder.table.len()); + + let res = encode(&mut encoder, vec![header("foo", "hello")]); + assert_eq!([0x80 | 62], *res); + + assert_eq!(encoder.table.len(), 1); + } + + #[test] + fn test_evicting_headers() { + let mut encoder = Encoder::default(); + + // Fill the table + for i in 0..64 { + let key = format!("x-hello-world-{:02}", i); + let res = encode(&mut encoder, vec![header(&key, &key)]); + + assert_eq!(&[0b01000000, 0x80 | 12], &res[0..2]); + assert_eq!(key, huff_decode(&res[2..14])); + assert_eq!(0x80 | 12, res[14]); + assert_eq!(key, huff_decode(&res[15..])); + assert_eq!(27, res.len()); + + // Make sure the header can be found... + let res = encode(&mut encoder, vec![header(&key, &key)]); + + // Only check that it is found + assert_eq!(0x80, res[0] & 0x80); + } + + assert_eq!(4096, encoder.table.size()); + assert_eq!(64, encoder.table.len()); + + // Find existing headers + for i in 0..64 { + let key = format!("x-hello-world-{:02}", i); + let res = encode(&mut encoder, vec![header(&key, &key)]); + assert_eq!(0x80, res[0] & 0x80); + } + + // Insert a new header + let key = "x-hello-world-64"; + let res = encode(&mut encoder, vec![header(key, key)]); + + assert_eq!(&[0b01000000, 0x80 | 12], &res[0..2]); + assert_eq!(key, huff_decode(&res[2..14])); + assert_eq!(0x80 | 12, res[14]); + assert_eq!(key, huff_decode(&res[15..])); + assert_eq!(27, res.len()); + + assert_eq!(64, encoder.table.len()); + + // Now try encoding entries that should exist in the table + for i in 1..65 { + let key = format!("x-hello-world-{:02}", i); + let res = encode(&mut encoder, vec![header(&key, &key)]); + assert_eq!(0x80 | (i + 61), res[0]); + } + } + + #[test] + fn test_large_headers_are_not_indexed() { + let mut encoder = Encoder::new(128, 0); + let key = "hello-world-hello-world-HELLO-zzz"; + + let res = encode(&mut encoder, vec![header(key, key)]); + + assert_eq!(&[0, 0x80 | 25], &res[..2]); + + assert_eq!(0, encoder.table.len()); + assert_eq!(0, encoder.table.size()); + } + + #[test] + fn test_sensitive_headers_are_never_indexed() { + use http::header::{HeaderName, HeaderValue}; + + let name = "my-password".parse().unwrap(); + let mut value = HeaderValue::try_from_bytes(b"12345").unwrap(); + value.set_sensitive(true); + + let header = Header::Field { name: name, value: value }; + + // Now, try to encode the sensitive header + + let mut encoder = Encoder::default(); + let res = encode(&mut encoder, vec![header]); + + assert_eq!(&[0b10000, 0x80 | 8], &res[..2]); + assert_eq!("my-password", huff_decode(&res[2..10])); + assert_eq!(0x80 | 4, res[10]); + assert_eq!("12345", huff_decode(&res[11..])); + + // Now, try to encode a sensitive header w/ a name in the static table + let name = "authorization".parse().unwrap(); + let mut value = HeaderValue::try_from_bytes(b"12345").unwrap(); + value.set_sensitive(true); + + let header = Header::Field { name: name, value: value }; + + let mut encoder = Encoder::default(); + let res = encode(&mut encoder, vec![header]); + + assert_eq!(&[0b11111, 8], &res[..2]); + assert_eq!(0x80 | 4, res[2]); + assert_eq!("12345", huff_decode(&res[3..])); + + // Using the name component of a previously indexed header (without + // sensitive flag set) + + let _ = encode(&mut encoder, vec![self::header("my-password", "not-so-secret")]); + + let name = "my-password".parse().unwrap(); + let mut value = HeaderValue::try_from_bytes(b"12345").unwrap(); + value.set_sensitive(true); + + let header = Header::Field { name: name, value: value }; + let res = encode(&mut encoder, vec![header]); + + assert_eq!(&[0b11111, 47], &res[..2]); + assert_eq!(0x80 | 4, res[2]); + assert_eq!("12345", huff_decode(&res[3..])); + } + + #[test] + fn test_content_length_value_not_indexed() { + let mut encoder = Encoder::default(); + let res = encode(&mut encoder, vec![header("content-length", "1234")]); + + assert_eq!(&[15, 13, 0x80 | 3], &res[0..3]); + assert_eq!("1234", huff_decode(&res[3..])); + assert_eq!(6, res.len()); + } + + #[test] + fn test_at_most_two_values_per_name_indexed() { + } + + #[test] + fn test_index_header_with_duplicate_name_does_not_evict() { + } + + #[test] + fn test_max_size_zero() { + } + + #[test] + fn test_increasing_table_size() { + } + + #[test] + fn test_decreasing_table_size_without_eviction() { + } + + #[test] + fn test_decreasing_table_size_with_eviction() { + } + + #[test] + fn test_encoding_into_undersized_buf() { + // Test hitting end at multiple points. + } + + + fn encode(e: &mut Encoder, hdrs: Vec
) -> BytesMut { + let mut dst = BytesMut::with_capacity(1024); + e.encode(hdrs, &mut dst); + dst + } + + fn method(s: &str) -> Header { + Header::Method(Method::from_bytes(s.as_bytes()).unwrap()) + } + + fn header(name: &str, val: &str) -> Header { + use http::header::{HeaderName, HeaderValue}; + + let name = HeaderName::from_bytes(name.as_bytes()).unwrap(); + let value = HeaderValue::try_from_bytes(val.as_bytes()).unwrap(); + + Header::Field { name: name, value: value } + } + + fn huff_decode(src: &[u8]) -> BytesMut { + huffman::decode(src).unwrap() + } +} diff --git a/src/hpack/header.rs b/src/hpack/header.rs index 5a87892..9d033bd 100644 --- a/src/hpack/header.rs +++ b/src/hpack/header.rs @@ -56,7 +56,7 @@ impl Header { Ok(Header::Path(value)) } b"status" => { - let status = try!(StatusCode::from_slice(&value)); + let status = try!(StatusCode::from_bytes(&value)); Ok(Header::Status(status)) } _ => { @@ -65,7 +65,7 @@ impl Header { } } else { let name = try!(HeaderName::from_bytes(&name)); - let value = try!(HeaderValue::try_from_slice(&value)); + let value = try!(HeaderValue::try_from_bytes(&value)); Ok(Header::Field { name: name, value: value }) } @@ -160,7 +160,11 @@ impl Header { } pub fn is_sensitive(&self) -> bool { - false + match *self { + Header::Field { ref value, .. } => value.is_sensitive(), + // TODO: Technically these other header values can be sensitive too. + _ => false, + } } pub fn skip_value_index(&self) -> bool { @@ -193,7 +197,7 @@ impl<'a> Name<'a> { Name::Field(name) => { Ok(Header::Field { name: name.clone(), - value: try!(HeaderValue::try_from_slice(&*value)), + value: try!(HeaderValue::try_from_bytes(&*value)), }) } Name::Authority => { @@ -209,7 +213,7 @@ impl<'a> Name<'a> { Ok(Header::Path(try!(ByteStr::from_utf8(value)))) } Name::Status => { - match StatusCode::from_slice(&value) { + match StatusCode::from_bytes(&value) { Ok(status) => Ok(Header::Status(status)), // TODO: better error handling Err(_) => Err(DecoderError::InvalidStatusCode), diff --git a/src/hpack/huffman/mod.rs b/src/hpack/huffman/mod.rs index 430b155..c11dd48 100644 --- a/src/hpack/huffman/mod.rs +++ b/src/hpack/huffman/mod.rs @@ -67,40 +67,6 @@ pub fn encode(src: &[u8], dst: &mut B) { } } -/* -static size_t encode_huffman(uint8_t *_dst, const uint8_t *src, size_t len) -{ - uint8_t *dst = _dst, *dst_end = dst + len; - const uint8_t *src_end = src + len; - uint64_t bits = 0; - int bits_left = 40; - - while (src != src_end) { - const nghttp2_huff_sym *sym = huff_sym_table + *src++; - bits |= (uint64_t)sym->code << (bits_left - sym->nbits); - bits_left -= sym->nbits; - while (bits_left <= 32) { - *dst++ = bits >> 32; - bits <<= 8; - bits_left += 8; - if (dst == dst_end) { - return 0; - } - } - } - - if (bits_left != 40) { - bits |= ((uint64_t)1 << bits_left) - 1; - *dst++ = bits >> 32; - } - if (dst == dst_end) { - return 0; - } - - return dst - _dst; -} - */ - impl Decoder { fn new() -> Decoder { Decoder { diff --git a/src/hpack/table.rs b/src/hpack/table.rs index 99cc62e..5d0468c 100644 --- a/src/hpack/table.rs +++ b/src/hpack/table.rs @@ -4,7 +4,7 @@ use fnv::FnvHasher; use http::method; use http::header::{self, HeaderName, HeaderValue}; -use std::mem; +use std::{cmp, mem}; use std::collections::VecDeque; use std::hash::{Hash, Hasher}; @@ -32,7 +32,7 @@ pub enum Index<'a> { Inserted(&'a Header), // Only the value has been inserted - InsertedValue(usize, Header), + InsertedValue(usize, &'a Header), // The header is not indexed by this table NotIndexed(Header), @@ -75,19 +75,28 @@ macro_rules! probe_loop { } impl Table { - pub fn with_capacity(n: usize) -> Table { - if n == 0 { - unimplemented!(); + pub fn new(max_size: usize, capacity: usize) -> Table { + if capacity == 0 { + Table { + mask: 0, + indices: vec![], + slots: VecDeque::new(), + evicted: 0, + size: 0, + max_size: max_size, + } } else { - let capacity = to_raw_capacity(n).next_power_of_two(); + let capacity = cmp::max( + to_raw_capacity(capacity).next_power_of_two(), + 8); Table { mask: capacity.wrapping_sub(1), indices: vec![None; capacity], - slots: VecDeque::with_capacity(n), + slots: VecDeque::with_capacity(usable_capacity(capacity)), evicted: 0, size: 0, - max_size: 4096, + max_size: max_size, } } } @@ -121,7 +130,17 @@ impl Table { } fn index_dynamic(&mut self, header: Header, statik: Option<(usize, bool)>) -> Index { - self.reserve_one(); + if header.len() + self.size < self.max_size || !header.is_sensitive() { + // Only grow internal storage if needed + self.reserve_one(); + } + + if self.indices.is_empty() { + // If `indices` is not empty, then it is impossible for all + // `indices` entries to be `Some`. So, we only need to check for the + // empty case. + return Index::new(statik, header); + } let hash = hash_header(&header); @@ -136,15 +155,17 @@ impl Table { // displacement. let their_dist = probe_distance(self.mask, pos.hash, probe); + let slot_idx = pos.index.wrapping_sub(self.evicted); + if their_dist < dist { // Index robinhood - return self.index_vacant(header, hash, desired_pos, probe); - } else if pos.hash == hash && self.slots[pos.index].header.name() == header.name() { + return self.index_vacant(header, hash, dist, probe, statik); + } else if pos.hash == hash && self.slots[slot_idx].header.name() == header.name() { // Matching name, check values return self.index_occupied(header, pos.index, statik); } } else { - return self.index_vacant(header, hash, desired_pos, probe); + return self.index_vacant(header, hash, dist, probe, statik); } dist += 1; @@ -157,19 +178,59 @@ impl Table { // There already is a match for the given header name. Check if a value // matches. The header will also only be inserted if the table is not at // capacity. - unimplemented!(); + let mut n = 0; + + while n < MAX_VALUES_PER_NAME { + // Compute the real index into the VecDeque + let real_idx = index.wrapping_sub(self.evicted); + + if self.slots[real_idx].header.value_eq(&header) { + // We have a full match! + return Index::Indexed(real_idx + DYN_OFFSET, header); + } + + if let Some(next) = self.slots[real_idx].next { + n = next; + continue; + } + + if header.is_sensitive() { + return Index::Name(real_idx + DYN_OFFSET, header); + } + + // Maybe index + unimplemented!(); + } + + Index::NotIndexed(header) } fn index_vacant(&mut self, header: Header, hash: HashValue, - desired: usize, - probe: usize) + dist: usize, + mut probe: usize, + statik: Option<(usize, bool)>) -> Index { - if self.maybe_evict(header.len()) { - // Maybe step back - unimplemented!(); + if header.is_sensitive() { + return Index::new(statik, header); + } + + if self.update_size(header.len()) { + if dist != 0 { + let back = probe.wrapping_sub(1) & self.mask; + + if let Some(pos) = self.indices[probe] { + let their_dist = probe_distance(self.mask, pos.hash, probe); + + if their_dist < dist { + probe = back; + } + } else { + probe = back; + } + } } // The index is offset by the current # of evicted elements @@ -193,7 +254,6 @@ impl Table { probe_loop!(probe < self.indices.len(), { let pos = &mut self.indices[probe as usize]; - let p = mem::replace(pos, Some(prev)); prev = match mem::replace(pos, Some(prev)) { Some(p) => p, @@ -202,14 +262,18 @@ impl Table { }); } - Index::Inserted(&self.slots[slot_idx].header) + if let Some((n, _)) = statik { + Index::InsertedValue(n, &self.slots[slot_idx].header) + } else { + Index::Inserted(&self.slots[slot_idx].header) + } } - fn maybe_evict(&mut self, len: usize) -> bool { - let target = self.max_size - len; + fn update_size(&mut self, len: usize) -> bool { + self.size += len; let mut ret = false; - while self.size > target { + while self.size > self.max_size { ret = true; self.evict(); } @@ -224,6 +288,10 @@ impl Table { let slot = self.slots.pop_front().unwrap(); let mut probe = desired_pos(self.mask, slot.hash); + // Update the size + self.size -= slot.header.len(); + + // Equivalent to 0.wrapping_add(self.evicted); let pos_idx = self.evicted; // Find the associated position @@ -328,6 +396,19 @@ impl Table { } } +#[cfg(test)] +impl Table { + /// Returns the number of headers in the table + pub fn len(&self) -> usize { + self.slots.len() + } + + /// Returns the table size + pub fn size(&self) -> usize { + self.size + } +} + impl<'a> Index<'a> { fn new(v: Option<(usize, bool)>, e: Header) -> Index<'a> { match v { diff --git a/src/hpack/test.rs b/src/hpack/test.rs index 664a68c..28a673e 100644 --- a/src/hpack/test.rs +++ b/src/hpack/test.rs @@ -3,7 +3,7 @@ extern crate walkdir; extern crate serde; extern crate serde_json; -use super::{Entry, Decoder}; +use super::{Header, Decoder}; use self::hex::FromHex; use self::serde_json::Value; @@ -111,24 +111,24 @@ struct Case { header_table_size: Option, } -fn key_str(e: &Entry) -> &str { +fn key_str(e: &Header) -> &str { match *e { - Entry::Header { ref name, .. } => name.as_str(), - Entry::Authority(..) => ":authority", - Entry::Method(..) => ":method", - Entry::Scheme(..) => ":scheme", - Entry::Path(..) => ":path", - Entry::Status(..) => ":status", + Header::Field { ref name, .. } => name.as_str(), + Header::Authority(..) => ":authority", + Header::Method(..) => ":method", + Header::Scheme(..) => ":scheme", + Header::Path(..) => ":path", + Header::Status(..) => ":status", } } -fn value_str(e: &Entry) -> &str { +fn value_str(e: &Header) -> &str { match *e { - Entry::Header { ref value, .. } => value.to_str().unwrap(), - Entry::Authority(ref v) => &**v, - Entry::Method(ref m) => m.as_str(), - Entry::Scheme(ref v) => &**v, - Entry::Path(ref v) => &**v, - Entry::Status(ref v) => v.as_str(), + Header::Field { ref value, .. } => value.to_str().unwrap(), + Header::Authority(ref v) => &**v, + Header::Method(ref m) => m.as_str(), + Header::Scheme(ref v) => &**v, + Header::Path(ref v) => &**v, + Header::Status(ref v) => v.as_str(), } }