ferron/util/
hostname_radix_tree.rs

1use std::{borrow::Cow, collections::BTreeMap};
2
3/// A multi-key for the hostname lookup
4#[derive(Clone, Debug, Eq, PartialEq)]
5struct HostnameRadixTreeMultiKey<'a>(Vec<Cow<'a, str>>);
6
7#[allow(clippy::non_canonical_partial_ord_impl)]
8impl<'a> PartialOrd for HostnameRadixTreeMultiKey<'a> {
9  fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
10    for i in 0..self.0.len().max(other.0.len()) {
11      let self_element = self.0.get(i);
12      let other_element = other.0.get(i);
13      match (self_element, other_element) {
14        (Some(a), Some(b)) => {
15          let cmp = a.cmp(b);
16          if cmp != std::cmp::Ordering::Equal {
17            return Some(cmp);
18          }
19        }
20        _ => return None,
21      }
22    }
23    Some(std::cmp::Ordering::Equal)
24  }
25}
26
27impl<'a> Ord for HostnameRadixTreeMultiKey<'a> {
28  fn cmp(&self, other: &Self) -> std::cmp::Ordering {
29    // The `partial_cmp` method returns `None` if the keys are of different lengths,
30    // but for the `Ord` trait we need to define a total order.
31    // In this case, we can consider keys of different lengths as equal for ordering purposes,
32    // since they will be handled separately in the tree structure.
33    //
34    // This is especially important in radix trees with BTreeMaps as children.
35    self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal)
36  }
37}
38
39/// A node in the hostname radix tree
40#[derive(Debug)]
41struct HostnameRadixTreeNode<T> {
42  /// The exact-match configuration value associated with this node, if any
43  pub exact_value: Option<T>,
44  /// The wildcard configuration value associated with this node, if any
45  pub wildcard_value: Option<T>,
46  /// The child nodes of this node
47  pub children: BTreeMap<HostnameRadixTreeMultiKey<'static>, HostnameRadixTreeNode<T>>,
48}
49
50/// A radix tree for hostname lookups, supporting exact matches and wildcard matches
51#[derive(Debug)]
52pub struct HostnameRadixTree<T> {
53  root: HostnameRadixTreeNode<T>,
54}
55
56impl<T> HostnameRadixTree<T> {
57  /// Creates a new empty hostname radix tree
58  pub fn new() -> Self {
59    Self {
60      root: HostnameRadixTreeNode {
61        exact_value: None,
62        wildcard_value: None,
63        children: BTreeMap::new(),
64      },
65    }
66  }
67
68  /// Inserts a configuration value into the tree based on the provided key
69  pub fn insert(&mut self, matcher: String, value: T) {
70    let mut key: Vec<Cow<'static, str>> = Vec::new();
71    let mut is_wildcard = false;
72    for part in matcher.split('.').rev() {
73      if part.is_empty() {
74        continue;
75      }
76      if part == "*" {
77        is_wildcard = true;
78        continue;
79      }
80      if is_wildcard {
81        is_wildcard = false;
82        key.push(Cow::Owned("*".to_string()));
83      }
84      key.push(Cow::Owned(part.to_string()));
85    }
86
87    let mut current_node = &mut self.root;
88    let mut key_iter = key.into_iter();
89    let mut key_option = key_iter.next();
90    while let Some(key) = key_option.take() {
91      let mut multi_key = HostnameRadixTreeMultiKey(vec![key]);
92      match current_node.children.entry(multi_key) {
93        std::collections::btree_map::Entry::Occupied(mut entry) => {
94          let entry_key = entry.key();
95          for i in 1..=entry_key.0.len() {
96            if i == entry_key.0.len() {
97              // All keys match, continue with the existing multi-key
98
99              key_option = key_iter.next();
100              // Safety: after the current node is changed, we immediately break the inner loop and continue
101              // the outer loop. Without "unsafe", the borrow checker would not allow us to borrow the entry
102              // as mutable again in the next iteration of the outer loop.
103              current_node = unsafe {
104                std::mem::transmute::<&mut HostnameRadixTreeNode<T>, &mut HostnameRadixTreeNode<T>>(entry.get_mut())
105              };
106              break;
107            }
108            key_option = key_iter.next();
109            let mut break_multi_key = false;
110            if let Some(key) = &key_option {
111              if key != &entry_key.0[i] {
112                // Keys differ, break the multi-key and insert a new node
113                break_multi_key = true;
114              }
115            } else {
116              // No more keys, break the multi-key
117              break_multi_key = true;
118            }
119            if break_multi_key {
120              // Break the multi-key at index i and insert a new node for the remaining keys
121              let (mut entry_key, entry_value) = entry.remove_entry();
122              let entry_key_right = HostnameRadixTreeMultiKey(entry_key.0.split_off(i));
123              #[allow(clippy::mutable_key_type)]
124              let mut new_children = BTreeMap::new();
125              new_children.insert(entry_key_right, entry_value);
126              match current_node.children.entry(entry_key) {
127                std::collections::btree_map::Entry::Occupied(entry) => {
128                  current_node = entry.into_mut();
129                }
130                std::collections::btree_map::Entry::Vacant(entry) => {
131                  current_node = entry.insert(HostnameRadixTreeNode {
132                    exact_value: None,
133                    wildcard_value: None,
134                    children: new_children,
135                  });
136                }
137              }
138              break;
139            }
140          }
141        }
142        std::collections::btree_map::Entry::Vacant(entry) => {
143          multi_key = entry.into_key();
144
145          key_option = key_iter.next();
146          while let Some(key) = key_option.take() {
147            multi_key.0.push(key);
148            key_option = key_iter.next();
149          }
150
151          match current_node.children.entry(multi_key) {
152            std::collections::btree_map::Entry::Occupied(entry) => {
153              current_node = entry.into_mut();
154            }
155            std::collections::btree_map::Entry::Vacant(entry) => {
156              current_node = entry.insert(HostnameRadixTreeNode {
157                exact_value: None,
158                wildcard_value: None,
159                children: BTreeMap::new(),
160              });
161            }
162          };
163        }
164      }
165    }
166
167    if is_wildcard {
168      current_node.wildcard_value = Some(value);
169    } else {
170      current_node.exact_value = Some(value);
171    }
172  }
173
174  /// Obtains a reference to the configuration value associated with the provided multi-key, if it exists
175  pub fn get<'a>(&'a self, hostname: &'a str) -> Option<&'a T> {
176    let mut key: HostnameRadixTreeMultiKey<'a> = HostnameRadixTreeMultiKey(
177      hostname
178        .split('.')
179        .rev()
180        .filter_map(|s| if s.is_empty() { None } else { Some(Cow::Borrowed(s)) })
181        .collect(),
182    );
183
184    let mut current_node = &self.root;
185    let mut previous_value = None;
186    let mut value = current_node.wildcard_value.as_ref();
187    while !key.0.is_empty() {
188      if let Some((child_key, child)) = current_node.children.get_key_value(&key) {
189        // If a fixed multi-key matches, continue with the child node and the remaining key
190        if child_key.0.len() > key.0.len() {
191          // The child key is longer than the remaining key, so the fixed multi-key does not match
192          break;
193        }
194        current_node = child;
195        key.0 = key.0.split_off(child_key.0.len());
196        if current_node.wildcard_value.is_some() {
197          previous_value = value;
198          value = current_node.wildcard_value.as_ref();
199        }
200        continue;
201      }
202
203      // If nothing happened, probably no matching fixed multi-key found, so break the loop...
204      break;
205    }
206
207    if key.0.is_empty() {
208      // If the whole key is consumed, that means the whole hostname matches.
209      // In this case, set the value to the exact value, since it has higher priority than
210      // the wildcard value
211      value = previous_value;
212      if let Some(exact_value) = current_node.exact_value.as_ref() {
213        value = Some(exact_value);
214      }
215    }
216
217    value
218  }
219}
220
221#[cfg(test)]
222mod tests {
223  use super::*;
224  use std::borrow::Cow;
225
226  #[test]
227  fn test_hostname_radix_tree_multi_key_partial_ord() {
228    let a = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com"), Cow::Borrowed("example")]);
229    let b = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com"), Cow::Borrowed("example")]);
230    let c = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com"), Cow::Borrowed("test")]);
231    let d = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com")]);
232
233    assert_eq!(a.partial_cmp(&b), Some(std::cmp::Ordering::Equal));
234    assert_eq!(a.partial_cmp(&c), Some(std::cmp::Ordering::Less));
235    assert_eq!(c.partial_cmp(&a), Some(std::cmp::Ordering::Greater));
236    assert_eq!(a.partial_cmp(&d), None);
237  }
238
239  #[test]
240  fn test_hostname_radix_tree_multi_key_ord() {
241    let a = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com"), Cow::Borrowed("example")]);
242    let b = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com"), Cow::Borrowed("example")]);
243    let c = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com"), Cow::Borrowed("test")]);
244    let d = HostnameRadixTreeMultiKey(vec![Cow::Borrowed("com")]);
245
246    assert_eq!(a.cmp(&b), std::cmp::Ordering::Equal);
247    assert_eq!(a.cmp(&c), std::cmp::Ordering::Less);
248    assert_eq!(c.cmp(&a), std::cmp::Ordering::Greater);
249    assert_eq!(a.cmp(&d), std::cmp::Ordering::Equal);
250  }
251
252  #[test]
253  fn test_hostname_radix_tree_insert_and_get_exact() {
254    let mut tree = HostnameRadixTree::new();
255    tree.insert("example.com".to_string(), 42);
256    assert_eq!(tree.get("example.com"), Some(&42));
257    assert_eq!(tree.get("test.com"), None);
258  }
259
260  #[test]
261  fn test_hostname_radix_tree_insert_and_get_wildcard() {
262    let mut tree = HostnameRadixTree::new();
263    tree.insert("*.example.com".to_string(), 42);
264    assert_eq!(tree.get("test.example.com"), Some(&42));
265    assert_eq!(tree.get("example.com"), None);
266  }
267
268  #[test]
269  fn test_hostname_radix_tree_insert_and_get_overlap() {
270    let mut tree = HostnameRadixTree::new();
271    tree.insert("example.com".to_string(), 42);
272    tree.insert("*.example.com".to_string(), 100);
273    assert_eq!(tree.get("example.com"), Some(&42));
274    assert_eq!(tree.get("test.example.com"), Some(&100));
275  }
276
277  #[test]
278  fn test_hostname_radix_tree_insert_and_get_non_existent() {
279    let mut tree = HostnameRadixTree::new();
280    tree.insert("example.com".to_string(), 42);
281    assert_eq!(tree.get("nonexistent.com"), None);
282  }
283
284  #[test]
285  fn test_hostname_radix_tree_insert_and_get_empty_key() {
286    let mut tree = HostnameRadixTree::new();
287    tree.insert("".to_string(), 42);
288    assert_eq!(tree.get(""), Some(&42));
289  }
290}