http/uri/
authority.rs

1use std::convert::TryFrom;
2use std::hash::{Hash, Hasher};
3use std::str::FromStr;
4use std::{cmp, fmt, str};
5
6use bytes::Bytes;
7
8use super::{ErrorKind, InvalidUri, Port, URI_CHARS};
9use crate::byte_str::ByteStr;
10
11/// Validation result for authority parsing.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13enum AuthorityError {
14    Empty,
15    InvalidUriChar,
16    InvalidAuthority,
17    TooManyColons,
18    MismatchedBrackets,
19    InvalidBracketUsage,
20    EmptyAfterAt,
21    InvalidPercent,
22}
23
24/// Represents the authority component of a URI.
25#[derive(Clone)]
26pub struct Authority {
27    pub(super) data: ByteStr,
28}
29
30impl Authority {
31    pub(super) fn empty() -> Self {
32        Authority {
33            data: ByteStr::new(),
34        }
35    }
36
37    // Not public while `bytes` is unstable.
38    pub(super) fn from_shared(s: Bytes) -> Result<Self, InvalidUri> {
39        // Precondition on create_authority: trivially satisfied by the
40        // identity closure
41        create_authority(s, |s| s)
42    }
43
44    /// Attempt to convert an `Authority` from a static string.
45    ///
46    /// This function will not perform any copying, and the string will be
47    /// checked if it is empty or contains an invalid character.
48    ///
49    /// # Panics
50    ///
51    /// This function panics if the argument contains invalid characters or
52    /// is empty.
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// # use http::uri::Authority;
58    /// let authority = Authority::from_static("example.com");
59    /// assert_eq!(authority.host(), "example.com");
60    /// ```
61    #[inline]
62    pub const fn from_static(src: &'static str) -> Self {
63        match validate_authority_bytes(src.as_bytes()) {
64            Ok(_) => Authority {
65                data: ByteStr::from_static(src),
66            },
67            Err(_) => panic!("static str is not valid authority"),
68        }
69    }
70
71    /// Attempt to convert a `Bytes` buffer to a `Authority`.
72    ///
73    /// This will try to prevent a copy if the type passed is the type used
74    /// internally, and will copy the data if it is not.
75    pub fn from_maybe_shared<T>(src: T) -> Result<Self, InvalidUri>
76    where
77        T: AsRef<[u8]> + 'static,
78    {
79        if_downcast_into!(T, Bytes, src, {
80            return Authority::from_shared(src);
81        });
82
83        Authority::try_from(src.as_ref())
84    }
85
86    // Note: this may return an *empty* Authority. You might want `parse_non_empty`.
87    // Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
88    // ret is the return value.
89    pub(super) fn parse(s: &[u8]) -> Result<usize, InvalidUri> {
90        validate_authority_bytes(s).map_err(|e| {
91            match e {
92                AuthorityError::Empty => ErrorKind::Empty,
93                AuthorityError::InvalidUriChar => ErrorKind::InvalidUriChar,
94                AuthorityError::InvalidAuthority
95                | AuthorityError::MismatchedBrackets
96                | AuthorityError::InvalidBracketUsage
97                | AuthorityError::EmptyAfterAt
98                | AuthorityError::InvalidPercent
99                | AuthorityError::TooManyColons => ErrorKind::InvalidAuthority,
100            }
101            .into()
102        })
103    }
104
105    // Parse bytes as an Authority, not allowing an empty string.
106    //
107    // This should be used by functions that allow a user to parse
108    // an `Authority` by itself.
109    //
110    // Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
111    // ret is the return value.
112    fn parse_non_empty(s: &[u8]) -> Result<usize, InvalidUri> {
113        if s.is_empty() {
114            return Err(ErrorKind::Empty.into());
115        }
116        Authority::parse(s)
117    }
118
119    /// Get the host of this `Authority`.
120    ///
121    /// The host subcomponent of authority is identified by an IP literal
122    /// encapsulated within square brackets, an IPv4 address in dotted- decimal
123    /// form, or a registered name.  The host subcomponent is **case-insensitive**.
124    ///
125    /// ```notrust
126    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
127    ///                         |---------|
128    ///                              |
129    ///                             host
130    /// ```
131    ///
132    /// # Examples
133    ///
134    /// ```
135    /// # use http::uri::*;
136    /// let authority: Authority = "example.org:80".parse().unwrap();
137    ///
138    /// assert_eq!(authority.host(), "example.org");
139    /// ```
140    #[inline]
141    pub fn host(&self) -> &str {
142        host(self.as_str())
143    }
144
145    /// Get the port part of this `Authority`.
146    ///
147    /// The port subcomponent of authority is designated by an optional port
148    /// number following the host and delimited from it by a single colon (":")
149    /// character. It can be turned into a decimal port number with the `as_u16`
150    /// method or as a `str` with the `as_str` method.
151    ///
152    /// ```notrust
153    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
154    ///                                     |-|
155    ///                                      |
156    ///                                     port
157    /// ```
158    ///
159    /// # Examples
160    ///
161    /// Authority with port
162    ///
163    /// ```
164    /// # use http::uri::Authority;
165    /// let authority: Authority = "example.org:80".parse().unwrap();
166    ///
167    /// let port = authority.port().unwrap();
168    /// assert_eq!(port.as_u16(), 80);
169    /// assert_eq!(port.as_str(), "80");
170    /// ```
171    ///
172    /// Authority without port
173    ///
174    /// ```
175    /// # use http::uri::Authority;
176    /// let authority: Authority = "example.org".parse().unwrap();
177    ///
178    /// assert!(authority.port().is_none());
179    /// ```
180    pub fn port(&self) -> Option<Port<&str>> {
181        let bytes = self.as_str();
182        bytes
183            .rfind(':')
184            .and_then(|i| Port::from_str(&bytes[i + 1..]).ok())
185    }
186
187    /// Get the port of this `Authority` as a `u16`.
188    ///
189    /// # Example
190    ///
191    /// ```
192    /// # use http::uri::Authority;
193    /// let authority: Authority = "example.org:80".parse().unwrap();
194    ///
195    /// assert_eq!(authority.port_u16(), Some(80));
196    /// ```
197    pub fn port_u16(&self) -> Option<u16> {
198        self.port().map(|p| p.as_u16())
199    }
200
201    /// Return a str representation of the authority
202    #[inline]
203    pub fn as_str(&self) -> &str {
204        &self.data[..]
205    }
206}
207
208// Purposefully not public while `bytes` is unstable.
209// impl TryFrom<Bytes> for Authority
210
211impl AsRef<str> for Authority {
212    fn as_ref(&self) -> &str {
213        self.as_str()
214    }
215}
216
217impl PartialEq for Authority {
218    fn eq(&self, other: &Authority) -> bool {
219        self.data.eq_ignore_ascii_case(&other.data)
220    }
221}
222
223impl Eq for Authority {}
224
225/// Case-insensitive equality
226///
227/// # Examples
228///
229/// ```
230/// # use http::uri::Authority;
231/// let authority: Authority = "HELLO.com".parse().unwrap();
232/// assert_eq!(authority, "hello.coM");
233/// assert_eq!("hello.com", authority);
234/// ```
235impl PartialEq<str> for Authority {
236    fn eq(&self, other: &str) -> bool {
237        self.data.eq_ignore_ascii_case(other)
238    }
239}
240
241impl PartialEq<Authority> for str {
242    fn eq(&self, other: &Authority) -> bool {
243        self.eq_ignore_ascii_case(other.as_str())
244    }
245}
246
247impl<'a> PartialEq<Authority> for &'a str {
248    fn eq(&self, other: &Authority) -> bool {
249        self.eq_ignore_ascii_case(other.as_str())
250    }
251}
252
253impl<'a> PartialEq<&'a str> for Authority {
254    fn eq(&self, other: &&'a str) -> bool {
255        self.data.eq_ignore_ascii_case(other)
256    }
257}
258
259impl PartialEq<String> for Authority {
260    fn eq(&self, other: &String) -> bool {
261        self.data.eq_ignore_ascii_case(other.as_str())
262    }
263}
264
265impl PartialEq<Authority> for String {
266    fn eq(&self, other: &Authority) -> bool {
267        self.as_str().eq_ignore_ascii_case(other.as_str())
268    }
269}
270
271/// Case-insensitive ordering
272///
273/// # Examples
274///
275/// ```
276/// # use http::uri::Authority;
277/// let authority: Authority = "DEF.com".parse().unwrap();
278/// assert!(authority < "ghi.com");
279/// assert!(authority > "abc.com");
280/// ```
281impl PartialOrd for Authority {
282    fn partial_cmp(&self, other: &Authority) -> Option<cmp::Ordering> {
283        let left = self.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
284        let right = other.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
285        left.partial_cmp(right)
286    }
287}
288
289impl PartialOrd<str> for Authority {
290    fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
291        let left = self.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
292        let right = other.as_bytes().iter().map(|b| b.to_ascii_lowercase());
293        left.partial_cmp(right)
294    }
295}
296
297impl PartialOrd<Authority> for str {
298    fn partial_cmp(&self, other: &Authority) -> Option<cmp::Ordering> {
299        let left = self.as_bytes().iter().map(|b| b.to_ascii_lowercase());
300        let right = other.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
301        left.partial_cmp(right)
302    }
303}
304
305impl<'a> PartialOrd<Authority> for &'a str {
306    fn partial_cmp(&self, other: &Authority) -> Option<cmp::Ordering> {
307        let left = self.as_bytes().iter().map(|b| b.to_ascii_lowercase());
308        let right = other.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
309        left.partial_cmp(right)
310    }
311}
312
313impl<'a> PartialOrd<&'a str> for Authority {
314    fn partial_cmp(&self, other: &&'a str) -> Option<cmp::Ordering> {
315        let left = self.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
316        let right = other.as_bytes().iter().map(|b| b.to_ascii_lowercase());
317        left.partial_cmp(right)
318    }
319}
320
321impl PartialOrd<String> for Authority {
322    fn partial_cmp(&self, other: &String) -> Option<cmp::Ordering> {
323        let left = self.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
324        let right = other.as_bytes().iter().map(|b| b.to_ascii_lowercase());
325        left.partial_cmp(right)
326    }
327}
328
329impl PartialOrd<Authority> for String {
330    fn partial_cmp(&self, other: &Authority) -> Option<cmp::Ordering> {
331        let left = self.as_bytes().iter().map(|b| b.to_ascii_lowercase());
332        let right = other.data.as_bytes().iter().map(|b| b.to_ascii_lowercase());
333        left.partial_cmp(right)
334    }
335}
336
337/// Case-insensitive hashing
338///
339/// # Examples
340///
341/// ```
342/// # use http::uri::Authority;
343/// # use std::hash::{Hash, Hasher};
344/// # use std::collections::hash_map::DefaultHasher;
345///
346/// let a: Authority = "HELLO.com".parse().unwrap();
347/// let b: Authority = "hello.coM".parse().unwrap();
348///
349/// let mut s = DefaultHasher::new();
350/// a.hash(&mut s);
351/// let a = s.finish();
352///
353/// let mut s = DefaultHasher::new();
354/// b.hash(&mut s);
355/// let b = s.finish();
356///
357/// assert_eq!(a, b);
358/// ```
359impl Hash for Authority {
360    fn hash<H>(&self, state: &mut H)
361    where
362        H: Hasher,
363    {
364        self.data.len().hash(state);
365        for &b in self.data.as_bytes() {
366            state.write_u8(b.to_ascii_lowercase());
367        }
368    }
369}
370
371impl<'a> TryFrom<&'a [u8]> for Authority {
372    type Error = InvalidUri;
373    #[inline]
374    fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
375        // parse first, and only turn into Bytes if valid
376
377        // Preconditon on create_authority: copy_from_slice() copies all of
378        // bytes from the [u8] parameter into a new Bytes
379        create_authority(s, Bytes::copy_from_slice)
380    }
381}
382
383impl<'a> TryFrom<&'a str> for Authority {
384    type Error = InvalidUri;
385    #[inline]
386    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
387        TryFrom::try_from(s.as_bytes())
388    }
389}
390
391impl TryFrom<Vec<u8>> for Authority {
392    type Error = InvalidUri;
393
394    #[inline]
395    fn try_from(vec: Vec<u8>) -> Result<Self, Self::Error> {
396        Authority::from_shared(vec.into())
397    }
398}
399
400impl TryFrom<String> for Authority {
401    type Error = InvalidUri;
402
403    #[inline]
404    fn try_from(t: String) -> Result<Self, Self::Error> {
405        Authority::from_shared(t.into())
406    }
407}
408
409impl FromStr for Authority {
410    type Err = InvalidUri;
411
412    fn from_str(s: &str) -> Result<Self, InvalidUri> {
413        TryFrom::try_from(s)
414    }
415}
416
417impl fmt::Debug for Authority {
418    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
419        f.write_str(self.as_str())
420    }
421}
422
423impl fmt::Display for Authority {
424    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
425        f.write_str(self.as_str())
426    }
427}
428
429fn host(auth: &str) -> &str {
430    let host_port = auth
431        .rsplit('@')
432        .next()
433        .expect("split always has at least 1 item");
434
435    if host_port.as_bytes()[0] == b'[' {
436        let i = host_port
437            .find(']')
438            .expect("parsing should validate brackets");
439        // ..= ranges aren't available in 1.20, our minimum Rust version...
440        &host_port[0..i + 1]
441    } else {
442        host_port
443            .split(':')
444            .next()
445            .expect("split always has at least 1 item")
446    }
447}
448
449// Precondition: f converts all of the bytes in the passed in B into the
450// returned Bytes.
451fn create_authority<B, F>(b: B, f: F) -> Result<Authority, InvalidUri>
452where
453    B: AsRef<[u8]>,
454    F: FnOnce(B) -> Bytes,
455{
456    let s = b.as_ref();
457    let authority_end = Authority::parse_non_empty(s)?;
458
459    if authority_end != s.len() {
460        return Err(ErrorKind::InvalidUriChar.into());
461    }
462
463    let bytes = f(b);
464
465    Ok(Authority {
466        // Safety: the postcondition on parse_non_empty() and the check against
467        // s.len() ensure that b is valid UTF-8. The precondition on f ensures
468        // that this is carried through to bytes.
469        data: unsafe { ByteStr::from_utf8_unchecked(bytes) },
470    })
471}
472
473/// Shared validation logic for authority bytes.
474/// Returns the end position of valid authority bytes, or an error.
475const fn validate_authority_bytes(s: &[u8]) -> Result<usize, AuthorityError> {
476    if s.is_empty() {
477        return Err(AuthorityError::Empty);
478    }
479
480    let mut colon_cnt: u32 = 0;
481    let mut start_bracket = false;
482    let mut end_bracket = false;
483    let mut has_percent = false;
484    let mut end = s.len();
485    let mut at_sign_pos: usize = s.len();
486    const MAX_COLONS: u32 = 8; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80
487
488    let mut i = 0;
489    // Among other things, this loop checks that every byte in s up to the
490    // first '/', '?', or '#' is a valid URI character (or in some contexts,
491    // a '%'). This means that each such byte is a valid single-byte UTF-8
492    // code point.
493    while i < s.len() {
494        let b = s[i];
495        let ch = URI_CHARS[b as usize];
496
497        if ch == b'/' || ch == b'?' || ch == b'#' {
498            end = i;
499            break;
500        }
501
502        if ch == 0 {
503            if b == b'%' {
504                // Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
505                // https://url.spec.whatwg.org/#authority-state
506                // the userinfo can have a percent-encoded username and password,
507                // so record that a `%` was found. If this turns out to be
508                // part of the userinfo, this flag will be cleared.
509                // Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
510                // be used to indicate a zone identifier.
511                // If the flag hasn't been cleared at the end, that means this
512                // was part of the hostname (and not part of an IPv6 address), and
513                // will fail with an error.
514                has_percent = true;
515            } else {
516                return Err(AuthorityError::InvalidUriChar);
517            }
518        } else if ch == b':' {
519            if colon_cnt >= MAX_COLONS {
520                return Err(AuthorityError::TooManyColons);
521            }
522            colon_cnt += 1;
523        } else if ch == b'[' {
524            if has_percent || start_bracket {
525                // Something other than the userinfo has a `%`, so reject it.
526                return Err(AuthorityError::InvalidBracketUsage);
527            }
528            start_bracket = true;
529        } else if ch == b']' {
530            if !start_bracket || end_bracket {
531                return Err(AuthorityError::InvalidBracketUsage);
532            }
533            end_bracket = true;
534
535            // Those were part of an IPv6 hostname, so forget them...
536            colon_cnt = 0;
537            has_percent = false;
538        } else if ch == b'@' {
539            at_sign_pos = i;
540
541            // Those weren't a port colon, but part of the
542            // userinfo, so it needs to be forgotten.
543            colon_cnt = 0;
544            has_percent = false;
545        }
546
547        i += 1;
548    }
549
550    if start_bracket != end_bracket {
551        return Err(AuthorityError::MismatchedBrackets);
552    }
553
554    if colon_cnt > 1 {
555        // Things like 'localhost:8080:3030' are rejected.
556        return Err(AuthorityError::InvalidAuthority);
557    }
558
559    if end > 0 && at_sign_pos == end - 1 {
560        // If there's nothing after an `@`, this is bonkers.
561        return Err(AuthorityError::EmptyAfterAt);
562    }
563
564    if has_percent {
565        // Something after the userinfo has a `%`, so reject it.
566        return Err(AuthorityError::InvalidPercent);
567    }
568
569    Ok(end)
570}
571
572#[cfg(test)]
573mod tests {
574    use super::*;
575
576    #[test]
577    fn parse_empty_string_is_error() {
578        let err = Authority::parse_non_empty(b"").unwrap_err();
579        assert_eq!(err.0, ErrorKind::Empty);
580    }
581
582    #[test]
583    fn equal_to_self_of_same_authority() {
584        let authority1: Authority = "example.com".parse().unwrap();
585        let authority2: Authority = "EXAMPLE.COM".parse().unwrap();
586        assert_eq!(authority1, authority2);
587        assert_eq!(authority2, authority1);
588    }
589
590    #[test]
591    fn not_equal_to_self_of_different_authority() {
592        let authority1: Authority = "example.com".parse().unwrap();
593        let authority2: Authority = "test.com".parse().unwrap();
594        assert_ne!(authority1, authority2);
595        assert_ne!(authority2, authority1);
596    }
597
598    #[test]
599    fn equates_with_a_str() {
600        let authority: Authority = "example.com".parse().unwrap();
601        assert_eq!(&authority, "EXAMPLE.com");
602        assert_eq!("EXAMPLE.com", &authority);
603        assert_eq!(authority, "EXAMPLE.com");
604        assert_eq!("EXAMPLE.com", authority);
605    }
606
607    #[test]
608    fn from_static_equates_with_a_str() {
609        let authority = Authority::from_static("example.com");
610        assert_eq!(authority, "example.com");
611    }
612
613    #[test]
614    fn not_equal_with_a_str_of_a_different_authority() {
615        let authority: Authority = "example.com".parse().unwrap();
616        assert_ne!(&authority, "test.com");
617        assert_ne!("test.com", &authority);
618        assert_ne!(authority, "test.com");
619        assert_ne!("test.com", authority);
620    }
621
622    #[test]
623    fn equates_with_a_string() {
624        let authority: Authority = "example.com".parse().unwrap();
625        assert_eq!(authority, "EXAMPLE.com".to_string());
626        assert_eq!("EXAMPLE.com".to_string(), authority);
627    }
628
629    #[test]
630    fn equates_with_a_string_of_a_different_authority() {
631        let authority: Authority = "example.com".parse().unwrap();
632        assert_ne!(authority, "test.com".to_string());
633        assert_ne!("test.com".to_string(), authority);
634    }
635
636    #[test]
637    fn compares_to_self() {
638        let authority1: Authority = "abc.com".parse().unwrap();
639        let authority2: Authority = "def.com".parse().unwrap();
640        assert!(authority1 < authority2);
641        assert!(authority2 > authority1);
642    }
643
644    #[test]
645    fn compares_with_a_str() {
646        let authority: Authority = "def.com".parse().unwrap();
647        // with ref
648        assert!(&authority < "ghi.com");
649        assert!("ghi.com" > &authority);
650        assert!(&authority > "abc.com");
651        assert!("abc.com" < &authority);
652
653        // no ref
654        assert!(authority < "ghi.com");
655        assert!("ghi.com" > authority);
656        assert!(authority > "abc.com");
657        assert!("abc.com" < authority);
658    }
659
660    #[test]
661    fn compares_with_a_string() {
662        let authority: Authority = "def.com".parse().unwrap();
663        assert!(authority < "ghi.com".to_string());
664        assert!("ghi.com".to_string() > authority);
665        assert!(authority > "abc.com".to_string());
666        assert!("abc.com".to_string() < authority);
667    }
668
669    #[test]
670    fn allows_percent_in_userinfo() {
671        let authority_str = "a%2f:b%2f@example.com";
672        let authority: Authority = authority_str.parse().unwrap();
673        assert_eq!(authority, authority_str);
674    }
675
676    #[test]
677    fn rejects_percent_in_hostname() {
678        let err = Authority::parse_non_empty(b"example%2f.com").unwrap_err();
679        assert_eq!(err.0, ErrorKind::InvalidAuthority);
680
681        let err = Authority::parse_non_empty(b"a%2f:b%2f@example%2f.com").unwrap_err();
682        assert_eq!(err.0, ErrorKind::InvalidAuthority);
683    }
684
685    #[test]
686    fn allows_percent_in_ipv6_address() {
687        let authority_str = "[fe80::1:2:3:4%25eth0]";
688        let result: Authority = authority_str.parse().unwrap();
689        assert_eq!(result, authority_str);
690    }
691
692    #[test]
693    fn reject_obviously_invalid_ipv6_address() {
694        let err = Authority::parse_non_empty(b"[0:1:2:3:4:5:6:7:8:9:10:11:12:13:14]").unwrap_err();
695        assert_eq!(err.0, ErrorKind::InvalidAuthority);
696    }
697
698    #[test]
699    fn rejects_percent_outside_ipv6_address() {
700        let err = Authority::parse_non_empty(b"1234%20[fe80::1:2:3:4]").unwrap_err();
701        assert_eq!(err.0, ErrorKind::InvalidAuthority);
702
703        let err = Authority::parse_non_empty(b"[fe80::1:2:3:4]%20").unwrap_err();
704        assert_eq!(err.0, ErrorKind::InvalidAuthority);
705    }
706
707    #[test]
708    fn rejects_invalid_utf8() {
709        let err = Authority::try_from([0xc0u8].as_ref()).unwrap_err();
710        assert_eq!(err.0, ErrorKind::InvalidUriChar);
711
712        let err = Authority::from_shared(Bytes::from_static([0xc0u8].as_ref())).unwrap_err();
713        assert_eq!(err.0, ErrorKind::InvalidUriChar);
714    }
715
716    #[test]
717    fn rejects_invalid_use_of_brackets() {
718        let err = Authority::parse_non_empty(b"[]@[").unwrap_err();
719        assert_eq!(err.0, ErrorKind::InvalidAuthority);
720
721        // reject tie-fighter
722        let err = Authority::parse_non_empty(b"]o[").unwrap_err();
723        assert_eq!(err.0, ErrorKind::InvalidAuthority);
724    }
725}