From e41349572a9ef1ecd905268243e2c3fd75523483 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Thu, 1 Jun 2017 12:35:20 -0700 Subject: [PATCH] Add huffman encoding --- src/hpack/encoder.rs | 25 +- src/hpack/huffman/mod.rs | 119 +++++++- src/hpack/huffman/table.rs | 263 +++++++++++++++++- util/genhuff/Cargo.toml | 6 + .../genhuff.rs => util/genhuff/src/main.rs | 24 +- 5 files changed, 428 insertions(+), 9 deletions(-) create mode 100644 util/genhuff/Cargo.toml rename src/bin/genhuff.rs => util/genhuff/src/main.rs (97%) diff --git a/src/hpack/encoder.rs b/src/hpack/encoder.rs index dc4f0b2..863c22e 100644 --- a/src/hpack/encoder.rs +++ b/src/hpack/encoder.rs @@ -1 +1,24 @@ -pub struct Encoder; +use http::header::{HeaderMap, HeaderName, HeaderValue}; +use bytes::BytesMut; + +pub struct Encoder { + table: HeaderMap<()>, + + // The remote sent a max size update, we must shrink the table on next call + // to encode. + max_size_update: Option, + + // Current max table size + max_size: usize, +} + +pub enum EncoderError { +} + +impl Encoder { + pub fn encode<'a, I>(&mut self, headers: I, dst: &mut BytesMut) -> Result<(), EncoderError> + where I: IntoIterator, + { + unimplemented!(); + } +} diff --git a/src/hpack/huffman/mod.rs b/src/hpack/huffman/mod.rs index 9b13e7e..2c0ddb4 100644 --- a/src/hpack/huffman/mod.rs +++ b/src/hpack/huffman/mod.rs @@ -1,6 +1,6 @@ mod table; -use self::table::DECODE_TABLE; +use self::table::{ENCODE_TABLE, DECODE_TABLE}; use hpack::DecoderError; use bytes::{BytesMut, BufMut}; @@ -18,9 +18,11 @@ const DECODED: u8 = 2; const ERROR: u8 = 4; pub fn decode(src: &[u8]) -> Result { + // TODO: This should not allocate and instead take a dst + let mut decoder = Decoder::new(); - // Max compression ration is >= 0.5 + // Max compression ratio is >= 0.5 let mut dst = BytesMut::with_capacity(src.len() << 1); for b in src { @@ -40,6 +42,66 @@ pub fn decode(src: &[u8]) -> Result { Ok(dst) } +// To avoid panics, the destination buffer must have src.len() remaining +// capacity. +pub fn encode(src: &[u8], dst: &mut B) { + let mut bits: u64 = 0; + let mut bits_left = 40; + + for &b in src { + let (nbits, code) = ENCODE_TABLE[b as usize]; + + bits |= code << (bits_left - nbits); + bits_left -= nbits; + + while (bits_left <= 32) { + dst.put_u8((bits >> 32) as u8); + bits <<= 8; + bits_left += 8; + } + } + + if bits_left != 40 { + // This writes the EOS token + bits |= (1 << bits_left) - 1; + dst.put_u8((bits >> 32) as u8); + } +} + +/* +static size_t encode_huffman(uint8_t *_dst, const uint8_t *src, size_t len) +{ + uint8_t *dst = _dst, *dst_end = dst + len; + const uint8_t *src_end = src + len; + uint64_t bits = 0; + int bits_left = 40; + + while (src != src_end) { + const nghttp2_huff_sym *sym = huff_sym_table + *src++; + bits |= (uint64_t)sym->code << (bits_left - sym->nbits); + bits_left -= sym->nbits; + while (bits_left <= 32) { + *dst++ = bits >> 32; + bits <<= 8; + bits_left += 8; + if (dst == dst_end) { + return 0; + } + } + } + + if (bits_left != 40) { + bits |= ((uint64_t)1 << bits_left) - 1; + *dst++ = bits >> 32; + } + if (dst == dst_end) { + return 0; + } + + return dst - _dst; +} + */ + impl Decoder { fn new() -> Decoder { Decoder { @@ -98,4 +160,57 @@ mod test { assert_eq!("!0", decode(&[254, 1]).unwrap()); assert_eq!(" !", decode(&[0b01010011, 0b11111000]).unwrap()); } + + #[test] + fn encode_single_byte() { + let mut dst = Vec::with_capacity(1); + + encode(b"o", &mut dst); + assert_eq!(&dst[..], &[0b00111111]); + + dst.clear(); + encode(b"0", &mut dst); + assert_eq!(&dst[..], &[0x0 + 7]); + + dst.clear(); + encode(b"A", &mut dst); + assert_eq!(&dst[..], &[(0x21 << 2) + 3]); + } + + #[test] + fn encode_decode_str() { + const DATA: &'static [&'static str] = &[ + "hello world", ":method", ":scheme", ":authority", "yahoo.co.jp", "GET", "http", ":path", "/images/top/sp2/cmn/logo-ns-130528.png", + "example.com", "hpack-test", "xxxxxxx1", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:16.0) Gecko/20100101 Firefox/16.0", + "accept", "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "cookie", "B=76j09a189a6h4&b=3&s=0b", + "TE", "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi non bibendum libero. Etiam ultrices lorem ut", + ]; + + for s in DATA { + let mut dst = Vec::with_capacity(s.len()); + + encode(s.as_bytes(), &mut dst); + + let decoded = decode(&dst).unwrap(); + + assert_eq!(&decoded[..], s.as_bytes()); + } + } + + #[test] + fn encode_decode_u8() { + const DATA: &'static [&'static [u8]] = &[ + b"\0", b"\0\0\0", b"\0\x01\x02\x03\x04\x05", b"\xFF\xF8", + ]; + + for s in DATA { + let mut dst = Vec::with_capacity(s.len()); + + encode(s, &mut dst); + + let decoded = decode(&dst).unwrap(); + + assert_eq!(&decoded[..], &s[..]); + } + } } diff --git a/src/hpack/huffman/table.rs b/src/hpack/huffman/table.rs index 42ca0e1..8fe1a7a 100644 --- a/src/hpack/huffman/table.rs +++ b/src/hpack/huffman/table.rs @@ -1,4 +1,265 @@ -// !!! DO NOT EDIT !!! Generated by src/bin/genhuff.rs +// !!! DO NOT EDIT !!! Generated by util/genhuff/src/main.rs + +// (num-bits, bits) +pub const ENCODE_TABLE: [(usize, u64); 257] = [ + (13, 0x1ff8), + (23, 0x7fffd8), + (28, 0xfffffe2), + (28, 0xfffffe3), + (28, 0xfffffe4), + (28, 0xfffffe5), + (28, 0xfffffe6), + (28, 0xfffffe7), + (28, 0xfffffe8), + (24, 0xffffea), + (30, 0x3ffffffc), + (28, 0xfffffe9), + (28, 0xfffffea), + (30, 0x3ffffffd), + (28, 0xfffffeb), + (28, 0xfffffec), + (28, 0xfffffed), + (28, 0xfffffee), + (28, 0xfffffef), + (28, 0xffffff0), + (28, 0xffffff1), + (28, 0xffffff2), + (30, 0x3ffffffe), + (28, 0xffffff3), + (28, 0xffffff4), + (28, 0xffffff5), + (28, 0xffffff6), + (28, 0xffffff7), + (28, 0xffffff8), + (28, 0xffffff9), + (28, 0xffffffa), + (28, 0xffffffb), + (6, 0x14), + (10, 0x3f8), + (10, 0x3f9), + (12, 0xffa), + (13, 0x1ff9), + (6, 0x15), + (8, 0xf8), + (11, 0x7fa), + (10, 0x3fa), + (10, 0x3fb), + (8, 0xf9), + (11, 0x7fb), + (8, 0xfa), + (6, 0x16), + (6, 0x17), + (6, 0x18), + (5, 0x0), + (5, 0x1), + (5, 0x2), + (6, 0x19), + (6, 0x1a), + (6, 0x1b), + (6, 0x1c), + (6, 0x1d), + (6, 0x1e), + (6, 0x1f), + (7, 0x5c), + (8, 0xfb), + (15, 0x7ffc), + (6, 0x20), + (12, 0xffb), + (10, 0x3fc), + (13, 0x1ffa), + (6, 0x21), + (7, 0x5d), + (7, 0x5e), + (7, 0x5f), + (7, 0x60), + (7, 0x61), + (7, 0x62), + (7, 0x63), + (7, 0x64), + (7, 0x65), + (7, 0x66), + (7, 0x67), + (7, 0x68), + (7, 0x69), + (7, 0x6a), + (7, 0x6b), + (7, 0x6c), + (7, 0x6d), + (7, 0x6e), + (7, 0x6f), + (7, 0x70), + (7, 0x71), + (7, 0x72), + (8, 0xfc), + (7, 0x73), + (8, 0xfd), + (13, 0x1ffb), + (19, 0x7fff0), + (13, 0x1ffc), + (14, 0x3ffc), + (6, 0x22), + (15, 0x7ffd), + (5, 0x3), + (6, 0x23), + (5, 0x4), + (6, 0x24), + (5, 0x5), + (6, 0x25), + (6, 0x26), + (6, 0x27), + (5, 0x6), + (7, 0x74), + (7, 0x75), + (6, 0x28), + (6, 0x29), + (6, 0x2a), + (5, 0x7), + (6, 0x2b), + (7, 0x76), + (6, 0x2c), + (5, 0x8), + (5, 0x9), + (6, 0x2d), + (7, 0x77), + (7, 0x78), + (7, 0x79), + (7, 0x7a), + (7, 0x7b), + (15, 0x7ffe), + (11, 0x7fc), + (14, 0x3ffd), + (13, 0x1ffd), + (28, 0xffffffc), + (20, 0xfffe6), + (22, 0x3fffd2), + (20, 0xfffe7), + (20, 0xfffe8), + (22, 0x3fffd3), + (22, 0x3fffd4), + (22, 0x3fffd5), + (23, 0x7fffd9), + (22, 0x3fffd6), + (23, 0x7fffda), + (23, 0x7fffdb), + (23, 0x7fffdc), + (23, 0x7fffdd), + (23, 0x7fffde), + (24, 0xffffeb), + (23, 0x7fffdf), + (24, 0xffffec), + (24, 0xffffed), + (22, 0x3fffd7), + (23, 0x7fffe0), + (24, 0xffffee), + (23, 0x7fffe1), + (23, 0x7fffe2), + (23, 0x7fffe3), + (23, 0x7fffe4), + (21, 0x1fffdc), + (22, 0x3fffd8), + (23, 0x7fffe5), + (22, 0x3fffd9), + (23, 0x7fffe6), + (23, 0x7fffe7), + (24, 0xffffef), + (22, 0x3fffda), + (21, 0x1fffdd), + (20, 0xfffe9), + (22, 0x3fffdb), + (22, 0x3fffdc), + (23, 0x7fffe8), + (23, 0x7fffe9), + (21, 0x1fffde), + (23, 0x7fffea), + (22, 0x3fffdd), + (22, 0x3fffde), + (24, 0xfffff0), + (21, 0x1fffdf), + (22, 0x3fffdf), + (23, 0x7fffeb), + (23, 0x7fffec), + (21, 0x1fffe0), + (21, 0x1fffe1), + (22, 0x3fffe0), + (21, 0x1fffe2), + (23, 0x7fffed), + (22, 0x3fffe1), + (23, 0x7fffee), + (23, 0x7fffef), + (20, 0xfffea), + (22, 0x3fffe2), + (22, 0x3fffe3), + (22, 0x3fffe4), + (23, 0x7ffff0), + (22, 0x3fffe5), + (22, 0x3fffe6), + (23, 0x7ffff1), + (26, 0x3ffffe0), + (26, 0x3ffffe1), + (20, 0xfffeb), + (19, 0x7fff1), + (22, 0x3fffe7), + (23, 0x7ffff2), + (22, 0x3fffe8), + (25, 0x1ffffec), + (26, 0x3ffffe2), + (26, 0x3ffffe3), + (26, 0x3ffffe4), + (27, 0x7ffffde), + (27, 0x7ffffdf), + (26, 0x3ffffe5), + (24, 0xfffff1), + (25, 0x1ffffed), + (19, 0x7fff2), + (21, 0x1fffe3), + (26, 0x3ffffe6), + (27, 0x7ffffe0), + (27, 0x7ffffe1), + (26, 0x3ffffe7), + (27, 0x7ffffe2), + (24, 0xfffff2), + (21, 0x1fffe4), + (21, 0x1fffe5), + (26, 0x3ffffe8), + (26, 0x3ffffe9), + (28, 0xffffffd), + (27, 0x7ffffe3), + (27, 0x7ffffe4), + (27, 0x7ffffe5), + (20, 0xfffec), + (24, 0xfffff3), + (20, 0xfffed), + (21, 0x1fffe6), + (22, 0x3fffe9), + (21, 0x1fffe7), + (21, 0x1fffe8), + (23, 0x7ffff3), + (22, 0x3fffea), + (22, 0x3fffeb), + (25, 0x1ffffee), + (25, 0x1ffffef), + (24, 0xfffff4), + (24, 0xfffff5), + (26, 0x3ffffea), + (23, 0x7ffff4), + (26, 0x3ffffeb), + (27, 0x7ffffe6), + (26, 0x3ffffec), + (26, 0x3ffffed), + (27, 0x7ffffe7), + (27, 0x7ffffe8), + (27, 0x7ffffe9), + (27, 0x7ffffea), + (27, 0x7ffffeb), + (28, 0xffffffe), + (27, 0x7ffffec), + (27, 0x7ffffed), + (27, 0x7ffffee), + (27, 0x7ffffef), + (27, 0x7fffff0), + (26, 0x3ffffee), + (30, 0x3fffffff), +]; // (next-state, byte, flags) pub const DECODE_TABLE: [[(usize, u8, u8); 16]; 256] = [ diff --git a/util/genhuff/Cargo.toml b/util/genhuff/Cargo.toml new file mode 100644 index 0000000..59887ed --- /dev/null +++ b/util/genhuff/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "genhuff" +version = "0.1.0" +authors = ["Carl Lerche "] + +[dependencies] diff --git a/src/bin/genhuff.rs b/util/genhuff/src/main.rs similarity index 97% rename from src/bin/genhuff.rs rename to util/genhuff/src/main.rs index d71ceb1..30b6845 100644 --- a/src/bin/genhuff.rs +++ b/util/genhuff/src/main.rs @@ -189,10 +189,12 @@ impl Node { } /// Returns root of tree -fn load_table() -> Box { +fn load_table() -> (Vec<(usize, String)>, Box) { let mut lines = TABLE.lines(); let mut root: Option> = None; + let mut encode = vec![]; + // Skip the first line, which is empty lines.next(); @@ -208,6 +210,10 @@ fn load_table() -> Box { } } + let hex = line[50..59].trim(); + + encode.push((bits.len(), hex.to_string())); + match root { Some(ref mut node) => { node.insert(i, &bits); @@ -226,19 +232,27 @@ fn load_table() -> Box { // Compute transitions for each node root.compute_transitions(&root); - root + (encode, root) } pub fn main() { - let table = load_table(); + let (encode, decode) = load_table(); - println!("// !!! DO NOT EDIT !!! Generated by src/bin/genhuff.rs"); + println!("// !!! DO NOT EDIT !!! Generated by util/genhuff/src/main.rs"); println!(""); + println!("// (num-bits, bits)"); + println!("pub const ENCODE_TABLE: [(usize, u64); 257] = ["); + for (nbits, val) in encode { + println!(" ({}, 0x{}),", nbits, val); + } + println!("];"); + + println!(""); println!("// (next-state, byte, flags)"); println!("pub const DECODE_TABLE: [[(usize, u8, u8); 16]; 256] = ["); - table.print(); + decode.print(); println!("];"); }