http/uri/
path.rs

1use std::convert::TryFrom;
2use std::str::FromStr;
3use std::{cmp, fmt, hash, str};
4
5use bytes::Bytes;
6
7use super::{ErrorKind, InvalidUri};
8use crate::byte_str::ByteStr;
9
10/// Validation result for path and query parsing.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12enum PathAndQueryError {
13    InvalidPathChar,
14    InvalidQueryChar,
15    FragmentNotAllowed,
16}
17
18/// Represents the path component of a URI
19#[derive(Clone)]
20pub struct PathAndQuery {
21    pub(super) data: ByteStr,
22    pub(super) query: u16,
23}
24
25const NONE: u16 = u16::MAX;
26
27impl PathAndQuery {
28    // Not public while `bytes` is unstable.
29    pub(super) fn from_shared(mut src: Bytes) -> Result<Self, InvalidUri> {
30        let mut query = NONE;
31        let mut fragment = None;
32
33        let mut is_maybe_not_utf8 = false;
34
35        // block for iterator borrow
36        {
37            let mut iter = src.as_ref().iter().enumerate();
38
39            // path ...
40            for (i, &b) in &mut iter {
41                // See https://url.spec.whatwg.org/#path-state
42                match b {
43                    b'?' => {
44                        debug_assert_eq!(query, NONE);
45                        query = i as u16;
46                        break;
47                    }
48                    b'#' => {
49                        fragment = Some(i);
50                        break;
51                    }
52
53                    // This is the range of bytes that don't need to be
54                    // percent-encoded in the path. If it should have been
55                    // percent-encoded, then error.
56                    #[rustfmt::skip]
57                    0x21 |
58                    0x24..=0x3B |
59                    0x3D |
60                    0x40..=0x5F |
61                    0x61..=0x7A |
62                    0x7C |
63                    0x7E => {}
64
65                    // potentially utf8, might not, should check
66                    0x7F..=0xFF => {
67                        is_maybe_not_utf8 = true;
68                    }
69
70                    // These are code points that are supposed to be
71                    // percent-encoded in the path but there are clients
72                    // out there sending them as is and httparse accepts
73                    // to parse those requests, so they are allowed here
74                    // for parity.
75                    //
76                    // For reference, those are code points that are used
77                    // to send requests with JSON directly embedded in
78                    // the URI path. Yes, those things happen for real.
79                    #[rustfmt::skip]
80                    b'"' |
81                    b'{' | b'}' => {}
82
83                    _ => return Err(ErrorKind::InvalidUriChar.into()),
84                }
85            }
86
87            // query ...
88            if query != NONE {
89                for (i, &b) in iter {
90                    match b {
91                        // While queries *should* be percent-encoded, most
92                        // bytes are actually allowed...
93                        // See https://url.spec.whatwg.org/#query-state
94                        //
95                        // Allowed: 0x21 / 0x24 - 0x3B / 0x3D / 0x3F - 0x7E
96                        #[rustfmt::skip]
97                        0x21 |
98                        0x24..=0x3B |
99                        0x3D |
100                        0x3F..=0x7E => {}
101
102                        0x7F..=0xFF => {
103                            is_maybe_not_utf8 = true;
104                        }
105
106                        b'#' => {
107                            fragment = Some(i);
108                            break;
109                        }
110
111                        _ => return Err(ErrorKind::InvalidUriChar.into()),
112                    }
113                }
114            }
115        }
116
117        if let Some(i) = fragment {
118            src.truncate(i);
119        }
120
121        let data = if is_maybe_not_utf8 {
122            ByteStr::from_utf8(src).map_err(|_| ErrorKind::InvalidUriChar)?
123        } else {
124            unsafe { ByteStr::from_utf8_unchecked(src) }
125        };
126
127        Ok(PathAndQuery { data, query })
128    }
129
130    /// Convert a `PathAndQuery` from a static string.
131    ///
132    /// This function will not perform any copying, however the string is
133    /// checked to ensure that it is valid.
134    ///
135    /// # Panics
136    ///
137    /// This function panics if the argument is an invalid path and query.
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// # use http::uri::*;
143    /// let v = PathAndQuery::from_static("/hello?world");
144    ///
145    /// assert_eq!(v.path(), "/hello");
146    /// assert_eq!(v.query(), Some("world"));
147    /// ```
148    #[inline]
149    pub const fn from_static(src: &'static str) -> Self {
150        match validate_path_and_query_bytes(src.as_bytes()) {
151            Ok(query) => PathAndQuery {
152                data: ByteStr::from_static(src),
153                query,
154            },
155            Err(_) => panic!("static str is not valid path"),
156        }
157    }
158
159    /// Attempt to convert a `Bytes` buffer to a `PathAndQuery`.
160    ///
161    /// This will try to prevent a copy if the type passed is the type used
162    /// internally, and will copy the data if it is not.
163    pub fn from_maybe_shared<T>(src: T) -> Result<Self, InvalidUri>
164    where
165        T: AsRef<[u8]> + 'static,
166    {
167        if_downcast_into!(T, Bytes, src, {
168            return PathAndQuery::from_shared(src);
169        });
170
171        PathAndQuery::try_from(src.as_ref())
172    }
173
174    pub(super) fn empty() -> Self {
175        PathAndQuery {
176            data: ByteStr::new(),
177            query: NONE,
178        }
179    }
180
181    pub(super) fn slash() -> Self {
182        PathAndQuery {
183            data: ByteStr::from_static("/"),
184            query: NONE,
185        }
186    }
187
188    pub(super) fn star() -> Self {
189        PathAndQuery {
190            data: ByteStr::from_static("*"),
191            query: NONE,
192        }
193    }
194
195    /// Returns the path component
196    ///
197    /// The path component is **case sensitive**.
198    ///
199    /// ```notrust
200    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
201    ///                                        |--------|
202    ///                                             |
203    ///                                           path
204    /// ```
205    ///
206    /// If the URI is `*` then the path component is equal to `*`.
207    ///
208    /// # Examples
209    ///
210    /// ```
211    /// # use http::uri::*;
212    ///
213    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
214    ///
215    /// assert_eq!(path_and_query.path(), "/hello/world");
216    /// ```
217    #[inline]
218    pub fn path(&self) -> &str {
219        let ret = if self.query == NONE {
220            &self.data[..]
221        } else {
222            &self.data[..self.query as usize]
223        };
224
225        if ret.is_empty() {
226            return "/";
227        }
228
229        ret
230    }
231
232    /// Returns the query string component
233    ///
234    /// The query component contains non-hierarchical data that, along with data
235    /// in the path component, serves to identify a resource within the scope of
236    /// the URI's scheme and naming authority (if any). The query component is
237    /// indicated by the first question mark ("?") character and terminated by a
238    /// number sign ("#") character or by the end of the URI.
239    ///
240    /// ```notrust
241    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
242    ///                                                   |-------------------|
243    ///                                                             |
244    ///                                                           query
245    /// ```
246    ///
247    /// # Examples
248    ///
249    /// With a query string component
250    ///
251    /// ```
252    /// # use http::uri::*;
253    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
254    ///
255    /// assert_eq!(path_and_query.query(), Some("key=value&foo=bar"));
256    /// ```
257    ///
258    /// Without a query string component
259    ///
260    /// ```
261    /// # use http::uri::*;
262    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
263    ///
264    /// assert!(path_and_query.query().is_none());
265    /// ```
266    #[inline]
267    pub fn query(&self) -> Option<&str> {
268        if self.query == NONE {
269            None
270        } else {
271            let i = self.query + 1;
272            Some(&self.data[i as usize..])
273        }
274    }
275
276    /// Returns the path and query as a string component.
277    ///
278    /// # Examples
279    ///
280    /// With a query string component
281    ///
282    /// ```
283    /// # use http::uri::*;
284    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
285    ///
286    /// assert_eq!(path_and_query.as_str(), "/hello/world?key=value&foo=bar");
287    /// ```
288    ///
289    /// Without a query string component
290    ///
291    /// ```
292    /// # use http::uri::*;
293    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
294    ///
295    /// assert_eq!(path_and_query.as_str(), "/hello/world");
296    /// ```
297    #[inline]
298    pub fn as_str(&self) -> &str {
299        let ret = &self.data[..];
300        if ret.is_empty() {
301            return "/";
302        }
303        ret
304    }
305}
306
307impl<'a> TryFrom<&'a [u8]> for PathAndQuery {
308    type Error = InvalidUri;
309    #[inline]
310    fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
311        PathAndQuery::from_shared(Bytes::copy_from_slice(s))
312    }
313}
314
315impl<'a> TryFrom<&'a str> for PathAndQuery {
316    type Error = InvalidUri;
317    #[inline]
318    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
319        TryFrom::try_from(s.as_bytes())
320    }
321}
322
323impl TryFrom<Vec<u8>> for PathAndQuery {
324    type Error = InvalidUri;
325    #[inline]
326    fn try_from(vec: Vec<u8>) -> Result<Self, Self::Error> {
327        PathAndQuery::from_shared(vec.into())
328    }
329}
330
331impl TryFrom<String> for PathAndQuery {
332    type Error = InvalidUri;
333    #[inline]
334    fn try_from(s: String) -> Result<Self, Self::Error> {
335        PathAndQuery::from_shared(s.into())
336    }
337}
338
339impl TryFrom<&String> for PathAndQuery {
340    type Error = InvalidUri;
341    #[inline]
342    fn try_from(s: &String) -> Result<Self, Self::Error> {
343        TryFrom::try_from(s.as_bytes())
344    }
345}
346
347impl FromStr for PathAndQuery {
348    type Err = InvalidUri;
349    #[inline]
350    fn from_str(s: &str) -> Result<Self, InvalidUri> {
351        TryFrom::try_from(s)
352    }
353}
354
355impl fmt::Debug for PathAndQuery {
356    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
357        fmt::Display::fmt(self, f)
358    }
359}
360
361impl fmt::Display for PathAndQuery {
362    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
363        if !self.data.is_empty() {
364            match self.data.as_bytes()[0] {
365                b'/' | b'*' => write!(fmt, "{}", &self.data[..]),
366                _ => write!(fmt, "/{}", &self.data[..]),
367            }
368        } else {
369            write!(fmt, "/")
370        }
371    }
372}
373
374impl hash::Hash for PathAndQuery {
375    fn hash<H: hash::Hasher>(&self, state: &mut H) {
376        self.data.hash(state);
377    }
378}
379
380// ===== PartialEq / PartialOrd =====
381
382impl PartialEq for PathAndQuery {
383    #[inline]
384    fn eq(&self, other: &PathAndQuery) -> bool {
385        self.data == other.data
386    }
387}
388
389impl Eq for PathAndQuery {}
390
391impl PartialEq<str> for PathAndQuery {
392    #[inline]
393    fn eq(&self, other: &str) -> bool {
394        self.as_str() == other
395    }
396}
397
398impl<'a> PartialEq<PathAndQuery> for &'a str {
399    #[inline]
400    fn eq(&self, other: &PathAndQuery) -> bool {
401        self == &other.as_str()
402    }
403}
404
405impl<'a> PartialEq<&'a str> for PathAndQuery {
406    #[inline]
407    fn eq(&self, other: &&'a str) -> bool {
408        self.as_str() == *other
409    }
410}
411
412impl PartialEq<PathAndQuery> for str {
413    #[inline]
414    fn eq(&self, other: &PathAndQuery) -> bool {
415        self == other.as_str()
416    }
417}
418
419impl PartialEq<String> for PathAndQuery {
420    #[inline]
421    fn eq(&self, other: &String) -> bool {
422        self.as_str() == other.as_str()
423    }
424}
425
426impl PartialEq<PathAndQuery> for String {
427    #[inline]
428    fn eq(&self, other: &PathAndQuery) -> bool {
429        self.as_str() == other.as_str()
430    }
431}
432
433impl PartialOrd for PathAndQuery {
434    #[inline]
435    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
436        self.as_str().partial_cmp(other.as_str())
437    }
438}
439
440impl PartialOrd<str> for PathAndQuery {
441    #[inline]
442    fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
443        self.as_str().partial_cmp(other)
444    }
445}
446
447impl PartialOrd<PathAndQuery> for str {
448    #[inline]
449    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
450        self.partial_cmp(other.as_str())
451    }
452}
453
454impl<'a> PartialOrd<&'a str> for PathAndQuery {
455    #[inline]
456    fn partial_cmp(&self, other: &&'a str) -> Option<cmp::Ordering> {
457        self.as_str().partial_cmp(*other)
458    }
459}
460
461impl<'a> PartialOrd<PathAndQuery> for &'a str {
462    #[inline]
463    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
464        self.partial_cmp(&other.as_str())
465    }
466}
467
468impl PartialOrd<String> for PathAndQuery {
469    #[inline]
470    fn partial_cmp(&self, other: &String) -> Option<cmp::Ordering> {
471        self.as_str().partial_cmp(other.as_str())
472    }
473}
474
475impl PartialOrd<PathAndQuery> for String {
476    #[inline]
477    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
478        self.as_str().partial_cmp(other.as_str())
479    }
480}
481
482/// Shared validation logic for path and query bytes.
483/// Returns the query position (or NONE), or an error.
484const fn validate_path_and_query_bytes(bytes: &[u8]) -> Result<u16, PathAndQueryError> {
485    let mut query: u16 = NONE;
486    let mut i: usize = 0;
487
488    // path ...
489    while i < bytes.len() {
490        let b = bytes[i];
491        if b == b'?' {
492            query = i as u16;
493            i += 1;
494            break;
495        } else if b == b'#' {
496            return Err(PathAndQueryError::FragmentNotAllowed);
497        } else {
498            let allowed = b == 0x21
499                || (b >= 0x24 && b <= 0x3B)
500                || b == 0x3D
501                || (b >= 0x40 && b <= 0x5F)
502                || (b >= 0x61 && b <= 0x7A)
503                || b == 0x7C
504                || b == 0x7E
505                || b == b'"'
506                || b == b'{'
507                || b == b'}'
508                || (b >= 0x7F);
509
510            if !allowed {
511                return Err(PathAndQueryError::InvalidPathChar);
512            }
513        }
514        i += 1;
515    }
516
517    // query ...
518    if query != NONE {
519        while i < bytes.len() {
520            let b = bytes[i];
521            if b == b'#' {
522                return Err(PathAndQueryError::FragmentNotAllowed);
523            }
524
525            let allowed = b == 0x21
526                || (b >= 0x24 && b <= 0x3B)
527                || b == 0x3D
528                || (b >= 0x3F && b <= 0x7E)
529                || (b >= 0x7F);
530
531            if !allowed {
532                return Err(PathAndQueryError::InvalidQueryChar);
533            }
534
535            i += 1;
536        }
537    }
538
539    Ok(query)
540}
541
542#[cfg(test)]
543mod tests {
544    use super::*;
545
546    #[test]
547    fn equal_to_self_of_same_path() {
548        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
549        let p2: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
550        assert_eq!(p1, p2);
551        assert_eq!(p2, p1);
552    }
553
554    #[test]
555    fn not_equal_to_self_of_different_path() {
556        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
557        let p2: PathAndQuery = "/world&foo=bar".parse().unwrap();
558        assert_ne!(p1, p2);
559        assert_ne!(p2, p1);
560    }
561
562    #[test]
563    fn equates_with_a_str() {
564        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
565        assert_eq!(&path_and_query, "/hello/world&foo=bar");
566        assert_eq!("/hello/world&foo=bar", &path_and_query);
567        assert_eq!(path_and_query, "/hello/world&foo=bar");
568        assert_eq!("/hello/world&foo=bar", path_and_query);
569    }
570
571    #[test]
572    fn not_equal_with_a_str_of_a_different_path() {
573        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
574        // as a reference
575        assert_ne!(&path_and_query, "/hello&foo=bar");
576        assert_ne!("/hello&foo=bar", &path_and_query);
577        // without reference
578        assert_ne!(path_and_query, "/hello&foo=bar");
579        assert_ne!("/hello&foo=bar", path_and_query);
580    }
581
582    #[test]
583    fn equates_with_a_string() {
584        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
585        assert_eq!(path_and_query, "/hello/world&foo=bar".to_string());
586        assert_eq!("/hello/world&foo=bar".to_string(), path_and_query);
587    }
588
589    #[test]
590    fn not_equal_with_a_string_of_a_different_path() {
591        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
592        assert_ne!(path_and_query, "/hello&foo=bar".to_string());
593        assert_ne!("/hello&foo=bar".to_string(), path_and_query);
594    }
595
596    #[test]
597    fn compares_to_self() {
598        let p1: PathAndQuery = "/a/world&foo=bar".parse().unwrap();
599        let p2: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
600        assert!(p1 < p2);
601        assert!(p2 > p1);
602    }
603
604    #[test]
605    fn compares_with_a_str() {
606        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
607        // by ref
608        assert!(&path_and_query < "/c/world&foo=bar");
609        assert!("/c/world&foo=bar" > &path_and_query);
610        assert!(&path_and_query > "/a/world&foo=bar");
611        assert!("/a/world&foo=bar" < &path_and_query);
612
613        // by val
614        assert!(path_and_query < "/c/world&foo=bar");
615        assert!("/c/world&foo=bar" > path_and_query);
616        assert!(path_and_query > "/a/world&foo=bar");
617        assert!("/a/world&foo=bar" < path_and_query);
618    }
619
620    #[test]
621    fn compares_with_a_string() {
622        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
623        assert!(path_and_query < "/c/world&foo=bar".to_string());
624        assert!("/c/world&foo=bar".to_string() > path_and_query);
625        assert!(path_and_query > "/a/world&foo=bar".to_string());
626        assert!("/a/world&foo=bar".to_string() < path_and_query);
627    }
628
629    #[test]
630    fn ignores_valid_percent_encodings() {
631        assert_eq!("/a%20b", pq("/a%20b?r=1").path());
632        assert_eq!("qr=%31", pq("/a/b?qr=%31").query().unwrap());
633    }
634
635    #[test]
636    fn ignores_invalid_percent_encodings() {
637        assert_eq!("/a%%b", pq("/a%%b?r=1").path());
638        assert_eq!("/aaa%", pq("/aaa%").path());
639        assert_eq!("/aaa%", pq("/aaa%?r=1").path());
640        assert_eq!("/aa%2", pq("/aa%2").path());
641        assert_eq!("/aa%2", pq("/aa%2?r=1").path());
642        assert_eq!("qr=%3", pq("/a/b?qr=%3").query().unwrap());
643    }
644
645    #[test]
646    fn allow_utf8_in_path() {
647        assert_eq!("/🍕", pq("/🍕").path());
648    }
649
650    #[test]
651    fn allow_utf8_in_query() {
652        assert_eq!(Some("pizza=🍕"), pq("/test?pizza=🍕").query());
653    }
654
655    #[test]
656    fn rejects_invalid_utf8_in_path() {
657        PathAndQuery::try_from(&[b'/', 0xFF][..]).expect_err("reject invalid utf8");
658    }
659
660    #[test]
661    fn rejects_invalid_utf8_in_query() {
662        PathAndQuery::try_from(&[b'/', b'a', b'?', 0xFF][..]).expect_err("reject invalid utf8");
663    }
664
665    #[test]
666    fn json_is_fine() {
667        assert_eq!(
668            r#"/{"bread":"baguette"}"#,
669            pq(r#"/{"bread":"baguette"}"#).path()
670        );
671    }
672
673    fn pq(s: &str) -> PathAndQuery {
674        s.parse().expect(&format!("parsing {}", s))
675    }
676}