Skip to main content

hypermail/
string_utils.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4/// Maximum length for URL detection to prevent ReDoS attacks.
5///
6/// RFC 3986 doesn't specify a maximum URL length, but browsers typically
7/// support 2048 characters. We allow up to 4096 for compatibility with
8/// data URIs and long query strings.
9const MAX_URL_LENGTH: usize = 4096;
10
11/// Maximum subject length for thread detection processing.
12///
13/// RFC 2822 recommends lines < 998 characters, but some clients generate
14/// longer subjects. We limit to 2048 for performance in O(n²) threading loop.
15const MAX_SUBJECT_THREAD_LENGTH: usize = 2048;
16
17static URL_RE: Lazy<Regex> =
18    Lazy::new(|| Regex::new(r#"(?i)((https?|ftp)://[^\s<>"']+|www\.[^\s<>"']+)"#).unwrap());
19
20static EMAIL_RE: Lazy<Regex> =
21    Lazy::new(|| Regex::new(r"([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})").unwrap());
22
23static UNRE_RE: Lazy<Regex> = Lazy::new(|| {
24    Regex::new(r"(?i)^(\s*(re|fwd?|aw|ang|sv|vs|odp|antw)\s*[\[:\]>#]*\s*)+")
25        .expect("UNRE_RE compile")
26});
27
28static ONEUNRE_RE: Lazy<Regex> = Lazy::new(|| {
29    Regex::new(r"(?i)^\s*(re|fwd?|aw|ang|sv|vs|odp|antw)\s*[\[:\]>#]*\s*")
30        .expect("ONEUNRE_RE compile")
31});
32
33static STRIPZONE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s+\([^)]*\)\s*$").unwrap());
34
35static NUM_REF_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"&#(\d+);").unwrap());
36
37/// Copies at most `max` bytes from `src` into `dest`, respecting UTF-8 char boundaries.
38pub fn strcpymax(dest: &mut String, src: &str, max: usize) {
39    dest.clear();
40    let char_boundary = src.floor_char_boundary(max.min(src.len()));
41    dest.push_str(&src[..char_boundary]);
42}
43
44/// Converts a string to lowercase in place.
45pub fn strtolower(s: &mut String) {
46    *s = s.to_lowercase();
47}
48
49/// Replaces all occurrences of `from` with `to` in place.
50pub fn strreplace_in(s: &mut String, from: &str, to: &str) {
51    *s = s.replace(from, to);
52}
53
54/// Replaces all occurrences of character `from` with string `to` in place.
55pub fn replacechar(s: &mut String, from: char, to: &str) {
56    *s = s.replace(from, to);
57}
58
59/// Strips reply prefixes from email subjects.
60///
61/// Removes internationalized reply/forward prefixes: Re:, Fwd:, AW: (German),
62/// SV: (Swedish), Odp: (Polish), Antw: (Dutch), etc.
63///
64/// # Security
65///
66/// For performance in O(n²) threading loops, extremely long subjects are
67/// truncated to MAX_SUBJECT_THREAD_LENGTH before regex processing.
68///
69/// # Examples
70///
71/// ```
72/// use hypermail::string_utils::unre;
73/// assert_eq!(unre("Re: Hello"), "Hello");
74/// assert_eq!(unre("RE: Re: Fwd: Hello"), "Hello");
75/// assert_eq!(unre("AW: Diskussion"), "Diskussion");
76/// ```
77pub fn unre(subject: &str) -> String {
78    // Security: Limit subject length to prevent ReDoS on pathological inputs
79    let truncated = if subject.len() > MAX_SUBJECT_THREAD_LENGTH {
80        &subject[..MAX_SUBJECT_THREAD_LENGTH]
81    } else {
82        subject
83    };
84
85    UNRE_RE.replace(truncated, "").trim().to_string()
86}
87
88/// Strips a single reply/forward prefix from a subject line.
89pub fn oneunre(subject: &str) -> String {
90    // Security: Same truncation as unre()
91    let truncated = if subject.len() > MAX_SUBJECT_THREAD_LENGTH {
92        &subject[..MAX_SUBJECT_THREAD_LENGTH]
93    } else {
94        subject
95    };
96
97    ONEUNRE_RE.replace(truncated, "").trim().to_string()
98}
99
100/// Attempts to parse a URL from the start of a string, writing it into `url`.
101pub fn parse_url(s: &str, url: &mut String) -> Option<usize> {
102    if let Some(m) = URL_RE.find(s) {
103        url.push_str(m.as_str());
104        Some(m.len())
105    } else {
106        None
107    }
108}
109
110/// Converts URLs in text to clickable HTML links.
111///
112/// Detects http://, https://, ftp:// URLs and www. patterns, converting
113/// them to `<a>` tags with rel="noopener noreferrer" for security.
114///
115/// # Security
116///
117/// To prevent ReDoS attacks, this function skips processing if the input
118/// exceeds reasonable length or contains extremely long potential URLs.
119///
120/// # Arguments
121///
122/// * `line` - Text that may contain URLs
123///
124/// # Returns
125///
126/// Text with URLs replaced by HTML `<a>` tags
127pub fn conv_urls(line: &str) -> String {
128    // Security: Skip URL processing on unreasonably large inputs
129    // This prevents ReDoS on malicious inputs with pathological patterns
130    if line.len() > MAX_URL_LENGTH * 10 {
131        return line.to_string();
132    }
133
134    URL_RE
135        .replace_all(line, |caps: &regex::Captures| {
136            let url = &caps[1];
137
138            // Security: Skip extremely long URLs to prevent memory issues
139            if url.len() > MAX_URL_LENGTH {
140                return url.to_string();
141            }
142
143            let href = if url.starts_with("www.") {
144                format!("https://{}", url)
145            } else {
146                url.to_string()
147            };
148            format!("<a href=\"{}\" rel=\"noopener noreferrer\">{}</a>", href, url)
149        })
150        .to_string()
151}
152
153/// Obfuscates an email address using HTML numeric character references.
154pub fn obfuscate_email_address(s: &str) -> String {
155    let mut result = String::with_capacity(s.len());
156    for c in s.chars() {
157        match c {
158            '@' => result.push_str("&#64;"),
159            '.' => result.push('.'),
160            '-' => result.push('-'),
161            '_' => result.push('_'),
162            c if c.is_ascii_alphanumeric() => {
163                let code = c as u32;
164                result.push_str(&format!("&#{};", code));
165            },
166            c => result.push(c),
167        }
168    }
169    result
170}
171
172/// Reverses HTML numeric character reference obfuscation back to plain text.
173pub fn unobfuscate_email_address(s: &str) -> String {
174    NUM_REF_RE
175        .replace_all(s, |caps: &regex::Captures| {
176            let code: u32 = caps[1].parse().unwrap_or(0);
177            char::from_u32(code).map_or(String::new(), |c| c.to_string())
178        })
179        .to_string()
180}
181
182/// Applies spam protection to email addresses in a string.
183///
184/// Replaces `@` with the configured anti-spam string, or substitutes the domain.
185pub fn spamify(
186    s: &str,
187    antispam_at: &str,
188    antispamdomain: Option<&str>,
189    spamprotect: bool,
190    spamprotect_id: bool,
191) -> String {
192    if !spamprotect && !spamprotect_id {
193        return s.to_string();
194    }
195
196    if !EMAIL_RE.is_match(s) {
197        return s.to_string();
198    }
199
200    let result = EMAIL_RE.replace_all(s, |caps: &regex::Captures| {
201        let email = &caps[1];
202        if let Some(domain) = antispamdomain {
203            if let Some(at_pos) = email.find('@') {
204                let local = &email[..at_pos];
205                return format!("{}@{}", local, domain);
206            }
207        }
208        if spamprotect {
209            email.replace('@', antispam_at)
210        } else {
211            email.to_string()
212        }
213    });
214
215    result.to_string()
216}
217
218/// Replaces characters found in `chars` with underscores.
219pub fn convchars(s: &str, chars: &str) -> String {
220    let mut result = String::with_capacity(s.len());
221    for c in s.chars() {
222        if chars.contains(c) {
223            result.push('_');
224        } else {
225            result.push(c);
226        }
227    }
228    result
229}
230
231/// Strips trailing parenthetical timezone info from a date string.
232pub fn stripzone(s: &str) -> String {
233    STRIPZONE_RE.replace(s.trim(), "").to_string()
234}
235
236/// Returns `None` if the string is empty or "NONE", otherwise returns `Some`.
237pub fn getvalue(s: &str) -> Option<&str> {
238    let s = s.trim();
239    if s.is_empty() || s.eq_ignore_ascii_case("NONE") {
240        None
241    } else {
242        Some(s)
243    }
244}
245
246/// Returns `val` if non-empty and not "NONE", otherwise returns `default_val`.
247pub fn getconfvalue(_key: &str, val: &str, default_val: &str) -> String {
248    if val.is_empty() || val.eq_ignore_ascii_case("NONE") {
249        default_val.to_string()
250    } else {
251        val.to_string()
252    }
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    #[test]
260    fn test_unre() {
261        assert_eq!(unre("Re: Hello"), "Hello");
262        assert_eq!(unre("Re: Re: Hello"), "Hello");
263        assert_eq!(unre("Fwd: Hello"), "Hello");
264        assert_eq!(unre("Hello"), "Hello");
265    }
266
267    #[test]
268    fn test_conv_urls() {
269        let result = conv_urls("Visit https://example.com today");
270        assert!(result.contains("<a href=\"https://example.com\""));
271        assert!(result.contains("rel=\"noopener noreferrer\""));
272    }
273
274    #[test]
275    fn test_obfuscate_email() {
276        let ob = obfuscate_email_address("a@b.com");
277        assert!(ob.contains("&#97;"));
278        assert!(ob.contains("&#64;"));
279    }
280
281    #[test]
282    fn test_spamify() {
283        let result = spamify("a@b.com", " at ", None, true, false);
284        assert_eq!(result, "a at b.com");
285    }
286
287    #[test]
288    fn test_spamify_with_domain() {
289        let result = spamify("a@b.com", "@", Some("example.com"), true, false);
290        assert_eq!(result, "a@example.com");
291    }
292
293    #[test]
294    fn test_stripzone() {
295        let result = stripzone("Mon, 15 Mar 2021 12:00:00 +0000 (UTC)");
296        assert!(!result.contains("(UTC)"));
297    }
298
299    #[test]
300    fn test_getvalue() {
301        assert_eq!(getvalue("test"), Some("test"));
302        assert_eq!(getvalue("NONE"), None);
303        assert_eq!(getvalue(""), None);
304    }
305
306    #[test]
307    fn test_spamify_antispamdomain_replaces_domain() {
308        let result = spamify("user@real-domain.com", "_at_", Some("nospam.invalid"), true, false);
309        assert!(result.contains("nospam.invalid"), "domain should be replaced");
310        assert!(!result.contains("real-domain.com"), "original domain should be gone");
311    }
312
313    #[test]
314    fn test_spamify_antispamdomain_none_falls_back_to_at_replacement() {
315        let result = spamify("user@real-domain.com", "_at_", None, true, false);
316        assert!(result.contains("_at_"), "should use antispam_at when no antispamdomain");
317        assert!(!result.contains('@'), "@ should be replaced");
318    }
319
320    #[test]
321    fn test_convchars() {
322        assert_eq!(convchars("hello world", " "), "hello_world");
323    }
324
325    #[test]
326    fn test_strcpymax_shorter_than_max() {
327        let mut dest = String::new();
328        strcpymax(&mut dest, "hello", 10);
329        assert_eq!(dest, "hello");
330    }
331
332    #[test]
333    fn test_strcpymax_truncates_at_char_boundary() {
334        let mut dest = String::new();
335        let s = "héllo"; // h=1 byte, é=2 bytes; floor_char_boundary(3) = 3 → "hé"
336        strcpymax(&mut dest, s, 3);
337        assert_eq!(dest, "hé");
338    }
339
340    #[test]
341    fn test_strtolower() {
342        let mut s = "HELLO World".to_string();
343        strtolower(&mut s);
344        assert_eq!(s, "hello world");
345    }
346
347    #[test]
348    fn test_strreplace_in() {
349        let mut s = "foo bar foo".to_string();
350        strreplace_in(&mut s, "foo", "baz");
351        assert_eq!(s, "baz bar baz");
352    }
353
354    #[test]
355    fn test_replacechar() {
356        let mut s = "a.b.c".to_string();
357        replacechar(&mut s, '.', "_dot_");
358        assert_eq!(s, "a_dot_b_dot_c");
359    }
360
361    #[test]
362    fn test_oneunre_strips_single_prefix() {
363        assert_eq!(oneunre("Re: Hello"), "Hello");
364        assert_eq!(oneunre("Re: Re: Hello"), "Re: Hello");
365        assert_eq!(oneunre("Hello"), "Hello");
366    }
367
368    #[test]
369    fn test_parse_url_found() {
370        let mut url = String::new();
371        let len = parse_url("https://example.com/path?q=1", &mut url);
372        assert!(len.is_some());
373        assert_eq!(url, "https://example.com/path?q=1");
374    }
375
376    #[test]
377    fn test_parse_url_not_found() {
378        let mut url = String::new();
379        let len = parse_url("plain text no url", &mut url);
380        assert!(len.is_none());
381        assert!(url.is_empty());
382    }
383
384    #[test]
385    fn test_unobfuscate_roundtrip() {
386        let original = "user@example.com";
387        let obfuscated = obfuscate_email_address(original);
388        let restored = unobfuscate_email_address(&obfuscated);
389        assert_eq!(restored, original);
390    }
391
392    #[test]
393    fn test_getconfvalue_returns_val() {
394        assert_eq!(getconfvalue("key", "value", "default"), "value");
395    }
396
397    #[test]
398    fn test_getconfvalue_returns_default_on_empty() {
399        assert_eq!(getconfvalue("key", "", "default"), "default");
400    }
401
402    #[test]
403    fn test_getconfvalue_returns_default_on_none() {
404        assert_eq!(getconfvalue("key", "NONE", "default"), "default");
405    }
406
407    #[test]
408    fn test_spamify_no_email_unchanged() {
409        let result = spamify("no email here", " at ", None, true, false);
410        assert_eq!(result, "no email here");
411    }
412
413    #[test]
414    fn test_spamify_disabled_unchanged() {
415        let result = spamify("user@example.com", " at ", None, false, false);
416        assert_eq!(result, "user@example.com");
417    }
418}