Follow cURL's rules for parsing and matching NO_PROXY (#1332)
There are a few ways in which reqwest's handling of NO_PROXY differs from cURL (and other implementations). The biggest issue is that whitespace between entries should be ignored/trimmed, but is not (i.e. "NO_PROXY='a, b'" would never match "b"). In addition, according to cURL's rules, a NO_PROXY entry without a leading dot should match the domain itself as well as any subdomains (reqwest only handles exact matches if there is no leading dot) and entries with a leading dot should only match subdomains (but request allows exact matches). Finally, cURL allows a special entry "*" to match all entries (effectively disabling use of the proxy).
This commit is contained in:
		
							
								
								
									
										123
									
								
								src/proxy.rs
									
									
									
									
									
								
							
							
						
						
									
										123
									
								
								src/proxy.rs
									
									
									
									
									
								
							| @@ -205,7 +205,7 @@ impl Proxy { | |||||||
|         ))) |         ))) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Provide a custom function to determine what traffix to proxy to where. |     /// Provide a custom function to determine what traffic to proxy to where. | ||||||
|     /// |     /// | ||||||
|     /// # Example |     /// # Example | ||||||
|     /// |     /// | ||||||
| @@ -366,8 +366,26 @@ impl fmt::Debug for Proxy { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl NoProxy { | impl NoProxy { | ||||||
|     /// Returns a new no proxy configration if the NO_PROXY/no_proxy environment variable is set. |     /// Returns a new no-proxy configuration based on environment variables (or `None` if no variables are set) | ||||||
|     /// Returns None otherwise |     /// | ||||||
|  |     /// The rules are as follows: | ||||||
|  |     /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked | ||||||
|  |     /// * If neither environment variable is set, `None` is returned | ||||||
|  |     /// * Entries are expected to be comma-separated (whitespace between entries is ignored) | ||||||
|  |     /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, | ||||||
|  |     /// for example "`192.168.1.0/24`"). | ||||||
|  |     /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) | ||||||
|  |     /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` | ||||||
|  |     /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. | ||||||
|  |     /// | ||||||
|  |     /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match | ||||||
|  |     /// (and therefore would bypass the proxy): | ||||||
|  |     /// * `http://google.com/` | ||||||
|  |     /// * `http://www.google.com/` | ||||||
|  |     /// * `http://192.168.1.42/` | ||||||
|  |     /// | ||||||
|  |     /// The URL `http://notgoogle.com/` would not match. | ||||||
|  |     /// | ||||||
|     fn new() -> Option<Self> { |     fn new() -> Option<Self> { | ||||||
|         let raw = env::var("NO_PROXY") |         let raw = env::var("NO_PROXY") | ||||||
|             .or_else(|_| env::var("no_proxy")) |             .or_else(|_| env::var("no_proxy")) | ||||||
| @@ -377,7 +395,7 @@ impl NoProxy { | |||||||
|         } |         } | ||||||
|         let mut ips = Vec::new(); |         let mut ips = Vec::new(); | ||||||
|         let mut domains = Vec::new(); |         let mut domains = Vec::new(); | ||||||
|         let parts = raw.split(','); |         let parts = raw.split(',').map(str::trim); | ||||||
|         for part in parts { |         for part in parts { | ||||||
|             match part.parse::<IpNet>() { |             match part.parse::<IpNet>() { | ||||||
|                 // If we can parse an IP net or address, then use it, otherwise, assume it is a domain |                 // If we can parse an IP net or address, then use it, otherwise, assume it is a domain | ||||||
| @@ -432,13 +450,25 @@ impl IpMatcher { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl DomainMatcher { | impl DomainMatcher { | ||||||
|  |     // The following links may be useful to understand the origin of these rules: | ||||||
|  |     // * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html | ||||||
|  |     // * https://github.com/curl/curl/issues/1208 | ||||||
|     fn contains(&self, domain: &str) -> bool { |     fn contains(&self, domain: &str) -> bool { | ||||||
|  |         let domain_len = domain.len(); | ||||||
|         for d in self.0.iter() { |         for d in self.0.iter() { | ||||||
|             // First check for a "wildcard" domain match. A single "." will match anything. |             if d == domain || (d.starts_with('.') && &d[1..] == domain) { | ||||||
|             // Otherwise, check that the domains are equal |                 return true; | ||||||
|             if (d.starts_with('.') && domain.ends_with(d.get(1..).unwrap_or_default())) |             } else if domain.ends_with(d) { | ||||||
|                 || d == domain |                 if d.starts_with('.') { | ||||||
|             { |                     // If the first character of d is a dot, that means the first character of domain | ||||||
|  |                     // must also be a dot, so we are looking at a subdomain of d and that matches | ||||||
|  |                     return true; | ||||||
|  |                 } else if domain.as_bytes()[domain_len - d.len() - 1] == b'.' { | ||||||
|  |                     // Given that d is a prefix of domain, if the prior character in domain is a dot | ||||||
|  |                     // then that means we must be matching a subdomain of d, and that matches | ||||||
|  |                     return true; | ||||||
|  |                 } | ||||||
|  |             } else if d == "*" { | ||||||
|                 return true; |                 return true; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| @@ -1183,18 +1213,38 @@ mod tests { | |||||||
|         let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None)))); |         let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None)))); | ||||||
|         p.no_proxy = NoProxy::new(); |         p.no_proxy = NoProxy::new(); | ||||||
|  |  | ||||||
|  |         // random url, not in no_proxy | ||||||
|         assert_eq!(intercepted_uri(&p, "http://hyper.rs"), target); |         assert_eq!(intercepted_uri(&p, "http://hyper.rs"), target); | ||||||
|         assert_eq!(intercepted_uri(&p, "http://foo.bar.baz"), target); |         // make sure that random non-subdomain string prefixes don't match | ||||||
|  |         assert_eq!(intercepted_uri(&p, "http://notfoo.bar"), target); | ||||||
|  |         // make sure that random non-subdomain string prefixes don't match | ||||||
|  |         assert_eq!(intercepted_uri(&p, "http://notbar.baz"), target); | ||||||
|  |         // ipv4 address out of range | ||||||
|         assert_eq!(intercepted_uri(&p, "http://10.43.1.1"), target); |         assert_eq!(intercepted_uri(&p, "http://10.43.1.1"), target); | ||||||
|  |         // ipv4 address out of range | ||||||
|         assert_eq!(intercepted_uri(&p, "http://10.124.7.7"), target); |         assert_eq!(intercepted_uri(&p, "http://10.124.7.7"), target); | ||||||
|  |         // ipv6 address out of range | ||||||
|         assert_eq!(intercepted_uri(&p, "http://[ffff:db8:a0b:12f0::1]"), target); |         assert_eq!(intercepted_uri(&p, "http://[ffff:db8:a0b:12f0::1]"), target); | ||||||
|  |         // ipv6 address out of range | ||||||
|         assert_eq!(intercepted_uri(&p, "http://[2005:db8:a0b:12f0::1]"), target); |         assert_eq!(intercepted_uri(&p, "http://[2005:db8:a0b:12f0::1]"), target); | ||||||
|  |  | ||||||
|  |         // make sure subdomains (with leading .) match | ||||||
|         assert!(p.intercept(&url("http://hello.foo.bar")).is_none()); |         assert!(p.intercept(&url("http://hello.foo.bar")).is_none()); | ||||||
|  |         // make sure exact matches (without leading .) match (also makes sure spaces between entries work) | ||||||
|         assert!(p.intercept(&url("http://bar.baz")).is_none()); |         assert!(p.intercept(&url("http://bar.baz")).is_none()); | ||||||
|  |         // check case sensitivity | ||||||
|  |         assert!(p.intercept(&url("http://BAR.baz")).is_none()); | ||||||
|  |         // make sure subdomains (without leading . in no_proxy) match | ||||||
|  |         assert!(p.intercept(&url("http://foo.bar.baz")).is_none()); | ||||||
|  |         // make sure subdomains (without leading . in no_proxy) match - this differs from cURL | ||||||
|  |         assert!(p.intercept(&url("http://foo.bar")).is_none()); | ||||||
|  |         // ipv4 address match within range | ||||||
|         assert!(p.intercept(&url("http://10.42.1.100")).is_none()); |         assert!(p.intercept(&url("http://10.42.1.100")).is_none()); | ||||||
|  |         // ipv6 address exact match | ||||||
|         assert!(p.intercept(&url("http://[::1]")).is_none()); |         assert!(p.intercept(&url("http://[::1]")).is_none()); | ||||||
|  |         // ipv6 address match within range | ||||||
|         assert!(p.intercept(&url("http://[2001:db8:a0b:12f0::1]")).is_none()); |         assert!(p.intercept(&url("http://[2001:db8:a0b:12f0::1]")).is_none()); | ||||||
|  |         // ipv4 address exact match | ||||||
|         assert!(p.intercept(&url("http://10.124.7.8")).is_none()); |         assert!(p.intercept(&url("http://10.124.7.8")).is_none()); | ||||||
|  |  | ||||||
|         // reset user setting when guards drop |         // reset user setting when guards drop | ||||||
| @@ -1204,6 +1254,59 @@ mod tests { | |||||||
|         drop(_lock); |         drop(_lock); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_wildcard_sys_no_proxy() { | ||||||
|  |         // Stop other threads from modifying process-global ENV while we are. | ||||||
|  |         let _lock = ENVLOCK.lock(); | ||||||
|  |         // save system setting first. | ||||||
|  |         let _g1 = env_guard("HTTP_PROXY"); | ||||||
|  |         let _g2 = env_guard("NO_PROXY"); | ||||||
|  |  | ||||||
|  |         let target = "http://example.domain/"; | ||||||
|  |         env::set_var("HTTP_PROXY", target); | ||||||
|  |  | ||||||
|  |         env::set_var("NO_PROXY", "*"); | ||||||
|  |  | ||||||
|  |         // Manually construct this so we aren't use the cache | ||||||
|  |         let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None)))); | ||||||
|  |         p.no_proxy = NoProxy::new(); | ||||||
|  |  | ||||||
|  |         assert!(p.intercept(&url("http://foo.bar")).is_none()); | ||||||
|  |  | ||||||
|  |         // reset user setting when guards drop | ||||||
|  |         drop(_g1); | ||||||
|  |         drop(_g2); | ||||||
|  |         // Let other threads run now | ||||||
|  |         drop(_lock); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_empty_sys_no_proxy() { | ||||||
|  |         // Stop other threads from modifying process-global ENV while we are. | ||||||
|  |         let _lock = ENVLOCK.lock(); | ||||||
|  |         // save system setting first. | ||||||
|  |         let _g1 = env_guard("HTTP_PROXY"); | ||||||
|  |         let _g2 = env_guard("NO_PROXY"); | ||||||
|  |  | ||||||
|  |         let target = "http://example.domain/"; | ||||||
|  |         env::set_var("HTTP_PROXY", target); | ||||||
|  |  | ||||||
|  |         env::set_var("NO_PROXY", ","); | ||||||
|  |  | ||||||
|  |         // Manually construct this so we aren't use the cache | ||||||
|  |         let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None)))); | ||||||
|  |         p.no_proxy = NoProxy::new(); | ||||||
|  |  | ||||||
|  |         // everything should go through proxy, "effectively" nothing is in no_proxy | ||||||
|  |         assert_eq!(intercepted_uri(&p, "http://hyper.rs"), target); | ||||||
|  |  | ||||||
|  |         // reset user setting when guards drop | ||||||
|  |         drop(_g1); | ||||||
|  |         drop(_g2); | ||||||
|  |         // Let other threads run now | ||||||
|  |         drop(_lock); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn test_no_proxy_load() { |     fn test_no_proxy_load() { | ||||||
|         // Stop other threads from modifying process-global ENV while we are. |         // Stop other threads from modifying process-global ENV while we are. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user