Follow cURL's rules for parsing and matching NO_PROXY (#1332)

There are a few ways in which reqwest's handling of NO_PROXY differs from cURL (and other implementations). The biggest issue is that whitespace between entries should be ignored/trimmed, but is not (i.e. "NO_PROXY='a, b'" would never match "b"). In addition, according to cURL's rules, a NO_PROXY entry without a leading dot should match the domain itself as well as any subdomains (reqwest only handles exact matches if there is no leading dot) and entries with a leading dot should only match subdomains (but request allows exact matches). Finally, cURL allows a special entry "*" to match all entries (effectively disabling use of the proxy).
This commit is contained in:
Adam Batkin
2021-10-07 14:39:31 -04:00
committed by GitHub
parent 6d682b53e2
commit 203cd5bfdc

View File

@@ -205,7 +205,7 @@ impl Proxy {
))) )))
} }
/// Provide a custom function to determine what traffix to proxy to where. /// Provide a custom function to determine what traffic to proxy to where.
/// ///
/// # Example /// # Example
/// ///
@@ -366,8 +366,26 @@ impl fmt::Debug for Proxy {
} }
impl NoProxy { impl NoProxy {
/// Returns a new no proxy configration if the NO_PROXY/no_proxy environment variable is set. /// Returns a new no-proxy configuration based on environment variables (or `None` if no variables are set)
/// Returns None otherwise ///
/// The rules are as follows:
/// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked
/// * If neither environment variable is set, `None` is returned
/// * Entries are expected to be comma-separated (whitespace between entries is ignored)
/// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size,
/// for example "`192.168.1.0/24`").
/// * An entry "`*`" matches all hostnames (this is the only wildcard allowed)
/// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com`
/// and `.google.com` are equivalent) and would match both that domain AND all subdomains.
///
/// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all of the following would match
/// (and therefore would bypass the proxy):
/// * `http://google.com/`
/// * `http://www.google.com/`
/// * `http://192.168.1.42/`
///
/// The URL `http://notgoogle.com/` would not match.
///
fn new() -> Option<Self> { fn new() -> Option<Self> {
let raw = env::var("NO_PROXY") let raw = env::var("NO_PROXY")
.or_else(|_| env::var("no_proxy")) .or_else(|_| env::var("no_proxy"))
@@ -377,7 +395,7 @@ impl NoProxy {
} }
let mut ips = Vec::new(); let mut ips = Vec::new();
let mut domains = Vec::new(); let mut domains = Vec::new();
let parts = raw.split(','); let parts = raw.split(',').map(str::trim);
for part in parts { for part in parts {
match part.parse::<IpNet>() { match part.parse::<IpNet>() {
// If we can parse an IP net or address, then use it, otherwise, assume it is a domain // If we can parse an IP net or address, then use it, otherwise, assume it is a domain
@@ -432,13 +450,25 @@ impl IpMatcher {
} }
impl DomainMatcher { impl DomainMatcher {
// The following links may be useful to understand the origin of these rules:
// * https://curl.se/libcurl/c/CURLOPT_NOPROXY.html
// * https://github.com/curl/curl/issues/1208
fn contains(&self, domain: &str) -> bool { fn contains(&self, domain: &str) -> bool {
let domain_len = domain.len();
for d in self.0.iter() { for d in self.0.iter() {
// First check for a "wildcard" domain match. A single "." will match anything. if d == domain || (d.starts_with('.') && &d[1..] == domain) {
// Otherwise, check that the domains are equal return true;
if (d.starts_with('.') && domain.ends_with(d.get(1..).unwrap_or_default())) } else if domain.ends_with(d) {
|| d == domain if d.starts_with('.') {
{ // If the first character of d is a dot, that means the first character of domain
// must also be a dot, so we are looking at a subdomain of d and that matches
return true;
} else if domain.as_bytes()[domain_len - d.len() - 1] == b'.' {
// Given that d is a prefix of domain, if the prior character in domain is a dot
// then that means we must be matching a subdomain of d, and that matches
return true;
}
} else if d == "*" {
return true; return true;
} }
} }
@@ -1176,25 +1206,45 @@ mod tests {
env::set_var( env::set_var(
"NO_PROXY", "NO_PROXY",
".foo.bar,bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17", ".foo.bar, bar.baz,10.42.1.1/24,::1,10.124.7.8,2001::/17",
); );
// Manually construct this so we aren't use the cache // Manually construct this so we aren't use the cache
let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None)))); let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None))));
p.no_proxy = NoProxy::new(); p.no_proxy = NoProxy::new();
// random url, not in no_proxy
assert_eq!(intercepted_uri(&p, "http://hyper.rs"), target); assert_eq!(intercepted_uri(&p, "http://hyper.rs"), target);
assert_eq!(intercepted_uri(&p, "http://foo.bar.baz"), target); // make sure that random non-subdomain string prefixes don't match
assert_eq!(intercepted_uri(&p, "http://notfoo.bar"), target);
// make sure that random non-subdomain string prefixes don't match
assert_eq!(intercepted_uri(&p, "http://notbar.baz"), target);
// ipv4 address out of range
assert_eq!(intercepted_uri(&p, "http://10.43.1.1"), target); assert_eq!(intercepted_uri(&p, "http://10.43.1.1"), target);
// ipv4 address out of range
assert_eq!(intercepted_uri(&p, "http://10.124.7.7"), target); assert_eq!(intercepted_uri(&p, "http://10.124.7.7"), target);
// ipv6 address out of range
assert_eq!(intercepted_uri(&p, "http://[ffff:db8:a0b:12f0::1]"), target); assert_eq!(intercepted_uri(&p, "http://[ffff:db8:a0b:12f0::1]"), target);
// ipv6 address out of range
assert_eq!(intercepted_uri(&p, "http://[2005:db8:a0b:12f0::1]"), target); assert_eq!(intercepted_uri(&p, "http://[2005:db8:a0b:12f0::1]"), target);
// make sure subdomains (with leading .) match
assert!(p.intercept(&url("http://hello.foo.bar")).is_none()); assert!(p.intercept(&url("http://hello.foo.bar")).is_none());
// make sure exact matches (without leading .) match (also makes sure spaces between entries work)
assert!(p.intercept(&url("http://bar.baz")).is_none()); assert!(p.intercept(&url("http://bar.baz")).is_none());
// check case sensitivity
assert!(p.intercept(&url("http://BAR.baz")).is_none());
// make sure subdomains (without leading . in no_proxy) match
assert!(p.intercept(&url("http://foo.bar.baz")).is_none());
// make sure subdomains (without leading . in no_proxy) match - this differs from cURL
assert!(p.intercept(&url("http://foo.bar")).is_none());
// ipv4 address match within range
assert!(p.intercept(&url("http://10.42.1.100")).is_none()); assert!(p.intercept(&url("http://10.42.1.100")).is_none());
// ipv6 address exact match
assert!(p.intercept(&url("http://[::1]")).is_none()); assert!(p.intercept(&url("http://[::1]")).is_none());
// ipv6 address match within range
assert!(p.intercept(&url("http://[2001:db8:a0b:12f0::1]")).is_none()); assert!(p.intercept(&url("http://[2001:db8:a0b:12f0::1]")).is_none());
// ipv4 address exact match
assert!(p.intercept(&url("http://10.124.7.8")).is_none()); assert!(p.intercept(&url("http://10.124.7.8")).is_none());
// reset user setting when guards drop // reset user setting when guards drop
@@ -1204,6 +1254,59 @@ mod tests {
drop(_lock); drop(_lock);
} }
#[test]
fn test_wildcard_sys_no_proxy() {
// Stop other threads from modifying process-global ENV while we are.
let _lock = ENVLOCK.lock();
// save system setting first.
let _g1 = env_guard("HTTP_PROXY");
let _g2 = env_guard("NO_PROXY");
let target = "http://example.domain/";
env::set_var("HTTP_PROXY", target);
env::set_var("NO_PROXY", "*");
// Manually construct this so we aren't use the cache
let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None))));
p.no_proxy = NoProxy::new();
assert!(p.intercept(&url("http://foo.bar")).is_none());
// reset user setting when guards drop
drop(_g1);
drop(_g2);
// Let other threads run now
drop(_lock);
}
#[test]
fn test_empty_sys_no_proxy() {
// Stop other threads from modifying process-global ENV while we are.
let _lock = ENVLOCK.lock();
// save system setting first.
let _g1 = env_guard("HTTP_PROXY");
let _g2 = env_guard("NO_PROXY");
let target = "http://example.domain/";
env::set_var("HTTP_PROXY", target);
env::set_var("NO_PROXY", ",");
// Manually construct this so we aren't use the cache
let mut p = Proxy::new(Intercept::System(Arc::new(get_sys_proxies(None))));
p.no_proxy = NoProxy::new();
// everything should go through proxy, "effectively" nothing is in no_proxy
assert_eq!(intercepted_uri(&p, "http://hyper.rs"), target);
// reset user setting when guards drop
drop(_g1);
drop(_g2);
// Let other threads run now
drop(_lock);
}
#[test] #[test]
fn test_no_proxy_load() { fn test_no_proxy_load() {
// Stop other threads from modifying process-global ENV while we are. // Stop other threads from modifying process-global ENV while we are.