From b6a10e5c1e8f3181e9c7a30d907ae4c27f33c271 Mon Sep 17 00:00:00 2001
From: Marko Lalic <marko.lalic@gmail.com>
Date: Tue, 6 Jan 2015 20:28:36 +0100
Subject: [PATCH] Fix chunk size parsing: handle invalid chunk sizes

Currently, when the chunk size was an invalid hex number (i.e.
contained a non-HEX octet), hyper would halt its parsing at the
last valid hex digit and report this as the chunk size.

For example, the following would be the chunk sizes reported for
some strings:

    "X" => 0
    "1X" => 1
    "aY" => 10
    "a;a" => 10 (only this should be correct!)

This fix makes it so that any chunk size, which cannot be parsed
as a valid hex number, causes an IoError. Additionally, the case
where the chunk size is followed by a chunk extension is also
handled (since the ";" token is a valid delimiter between the
chunk size and, as such, along with a LWS octet, represents a
valid terminal character of the chunk size).

Regression tests for the `get_chunk_size` function and included.
---
 src/http.rs | 69 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 6 deletions(-)
diff --git a/src/http.rs b/src/http.rs
index d0c63e3a..fb1cf0f7 100644
--- a/src/http.rs
+++ b/src/http.rs
@@ -137,17 +137,18 @@ fn read_chunk_size<R: Reader>(rdr: &mut R) -> IoResult<uint> {
     let mut size = 0u;
     let radix = 16;
     let mut in_ext = false;
+    let mut in_chunk_size = true;
     loop {
         match try!(rdr.read_byte()) {
-            b@b'0'...b'9' if !in_ext => {
+            b@b'0'...b'9' if in_chunk_size => {
                 size *= radix;
                 size += (b - b'0') as uint;
             },
-            b@b'a'...b'f' if !in_ext => {
+            b@b'a'...b'f' if in_chunk_size => {
                 size *= radix;
                 size += (b + 10 - b'a') as uint;
             },
-            b@b'A'...b'F' if !in_ext => {
+            b@b'A'...b'F' if in_chunk_size => {
                 size *= radix;
                 size += (b + 10 - b'A') as uint;
             },
@@ -157,9 +158,28 @@ fn read_chunk_size<R: Reader>(rdr: &mut R) -> IoResult<uint> {
                     _ => return Err(io::standard_error(io::InvalidInput))
                 }
             },
-            ext => {
+            // If we weren't in the extension yet, the ";" signals its start
+            b';' if !in_ext => {
                 in_ext = true;
+                in_chunk_size = false;
+            },
+            // "Linear white space" is ignored between the chunk size and the
+            // extension separator token (";") due to the "implied *LWS rule".
+            b'\t' | b' ' if !in_ext & !in_chunk_size => {},
+            // LWS can follow the chunk size, but no more digits can come
+            b'\t' | b' ' if in_chunk_size => in_chunk_size = false,
+            // We allow any arbitrary octet once we are in the extension, since
+            // they all get ignored anyway. According to the HTTP spec, valid
+            // extensions would have a more strict syntax:
+            //     (token ["=" (token | quoted-string)])
+            // but we gain nothing by rejecting an otherwise valid chunk size.
+            ext if in_ext => {
                 todo!("chunk extension byte={}", ext);
+            },
+            // Finally, if we aren't in the extension and we're reading any
+            // other octet, the chunk size line is invalid!
+            _ => {
+                return Err(io::standard_error(io::InvalidInput));
             }
         }
     }
@@ -689,7 +709,7 @@ fn expect(r: IoResult<u8>, expected: u8) -> HttpResult<()> {
 
 #[cfg(test)]
 mod tests {
-    use std::io::{self, MemReader, MemWriter};
+    use std::io::{self, MemReader, MemWriter, IoResult};
     use std::borrow::Cow::{Borrowed, Owned};
     use test::Bencher;
     use uri::RequestUri;
@@ -702,7 +722,7 @@ mod tests {
     use url::Url;
 
     use super::{read_method, read_uri, read_http_version, read_header,
-                RawHeaderLine, read_status, RawStatus};
+                RawHeaderLine, read_status, RawStatus, read_chunk_size};
 
     fn mem(s: &str) -> MemReader {
         MemReader::new(s.as_bytes().to_vec())
@@ -811,6 +831,43 @@ mod tests {
         assert_eq!(s, "foo barb");
     }
 
+    #[test]
+    fn test_read_chunk_size() {
+        fn read(s: &str, result: IoResult<uint>) {
+            assert_eq!(read_chunk_size(&mut mem(s)), result);
+        }
+
+        read("1\r\n", Ok(1));
+        read("01\r\n", Ok(1));
+        read("0\r\n", Ok(0));
+        read("00\r\n", Ok(0));
+        read("A\r\n", Ok(10));
+        read("a\r\n", Ok(10));
+        read("Ff\r\n", Ok(255));
+        read("Ff   \r\n", Ok(255));
+        // Missing LF or CRLF
+        read("F\rF", Err(io::standard_error(io::InvalidInput)));
+        read("F", Err(io::standard_error(io::EndOfFile)));
+        // Invalid hex digit
+        read("X\r\n", Err(io::standard_error(io::InvalidInput)));
+        read("1X\r\n", Err(io::standard_error(io::InvalidInput)));
+        read("-\r\n", Err(io::standard_error(io::InvalidInput)));
+        read("-1\r\n", Err(io::standard_error(io::InvalidInput)));
+        // Acceptable (if not fully valid) extensions do not influence the size
+        read("1;extension\r\n", Ok(1));
+        read("a;ext name=value\r\n", Ok(10));
+        read("1;extension;extension2\r\n", Ok(1));
+        read("1;;;  ;\r\n", Ok(1));
+        read("2; extension...\r\n", Ok(2));
+        read("3   ; extension=123\r\n", Ok(3));
+        read("3   ;\r\n", Ok(3));
+        read("3   ;   \r\n", Ok(3));
+        // Invalid extensions cause an error
+        read("1 invalid extension\r\n", Err(io::standard_error(io::InvalidInput)));
+        read("1 A\r\n", Err(io::standard_error(io::InvalidInput)));
+        read("1;no CRLF", Err(io::standard_error(io::EndOfFile)));
+    }
+
     #[bench]
     fn bench_read_method(b: &mut Bencher) {
         b.bytes = b"CONNECT ".len() as u64;