hypermail/
mime.rs

1use crate::error::Result;
2
3/// Parsed MIME Content-Type header with type, subtype, and parameters.
4#[derive(Debug, Clone)]
5pub struct ContentType {
6    pub type_: String,
7    pub subtype: String,
8    pub params: std::collections::HashMap<String, String>,
9}
10
11impl ContentType {
12    /// Parses a Content-Type header value into structured components.
13    pub fn parse(s: &str) -> Self {
14        let s = s.trim();
15        let mut params = std::collections::HashMap::new();
16
17        let (base, param_str) = if let Some(semi) = s.find(';') {
18            (s[..semi].trim(), Some(s[semi + 1..].trim()))
19        } else {
20            (s, None)
21        };
22
23        let (type_, subtype) = if let Some(slash) = base.find('/') {
24            (base[..slash].trim().to_lowercase(), base[slash + 1..].trim().to_lowercase())
25        } else {
26            (base.to_lowercase(), "".to_string())
27        };
28
29        if let Some(pstr) = param_str {
30            for part in pstr.split(';') {
31                let part = part.trim();
32                if let Some(eq) = part.find('=') {
33                    let key = part[..eq].trim().to_lowercase();
34                    let mut val = part[eq + 1..].trim().to_string();
35                    if (val.starts_with('"') && val.ends_with('"'))
36                        || (val.starts_with('\'') && val.ends_with('\''))
37                    {
38                        val = val[1..val.len() - 1].to_string();
39                    }
40                    params.insert(key, val);
41                }
42            }
43        }
44
45        ContentType { type_, subtype, params }
46    }
47
48    pub fn is_text(&self) -> bool {
49        self.type_ == "text"
50    }
51
52    pub fn is_multipart(&self) -> bool {
53        self.type_ == "multipart"
54    }
55
56    pub fn boundary(&self) -> Option<&str> {
57        self.params.get("boundary").map(|s| s.as_str())
58    }
59
60    pub fn charset(&self) -> Option<&str> {
61        self.params.get("charset").map(|s| s.as_str())
62    }
63
64    pub fn name(&self) -> Option<&str> {
65        self.params.get("name").map(|s| s.as_str())
66    }
67
68    pub fn full_type(&self) -> String {
69        format!("{}/{}", self.type_, self.subtype)
70    }
71}
72
73/// Parsed MIME Content-Disposition header with disposition type and parameters.
74#[derive(Debug, Clone)]
75pub struct ContentDisposition {
76    pub disposition: String,
77    pub params: std::collections::HashMap<String, String>,
78}
79
80impl ContentDisposition {
81    /// Parses a Content-Disposition header value into structured components.
82    pub fn parse(s: &str) -> Self {
83        let s = s.trim();
84        let mut params = std::collections::HashMap::new();
85
86        let (disp, param_str) = if let Some(semi) = s.find(';') {
87            (s[..semi].trim().to_lowercase(), Some(s[semi + 1..].trim()))
88        } else {
89            (s.to_lowercase(), None)
90        };
91
92        if let Some(pstr) = param_str {
93            for part in pstr.split(';') {
94                let part = part.trim();
95                if let Some(eq) = part.find('=') {
96                    let key = part[..eq].trim().to_lowercase();
97                    let mut val = part[eq + 1..].trim().to_string();
98                    if (val.starts_with('"') && val.ends_with('"'))
99                        || (val.starts_with('\'') && val.ends_with('\''))
100                    {
101                        val = val[1..val.len() - 1].to_string();
102                    }
103                    params.insert(key, val);
104                }
105            }
106        }
107
108        ContentDisposition { disposition: disp, params }
109    }
110
111    pub fn filename(&self) -> Option<&str> {
112        self.params.get("filename").map(|s| s.as_str())
113    }
114
115    pub fn is_attachment(&self) -> bool {
116        self.disposition == "attachment"
117    }
118}
119
120fn hex_val(b: u8) -> Option<u8> {
121    match b {
122        b'0'..=b'9' => Some(b - b'0'),
123        b'A'..=b'F' => Some(b - b'A' + 10),
124        b'a'..=b'f' => Some(b - b'a' + 10),
125        _ => None,
126    }
127}
128
129/// Decodes base64-encoded data, ignoring whitespace.
130pub fn decode_base64(data: &[u8]) -> Result<Vec<u8>> {
131    let text = std::str::from_utf8(data)
132        .map_err(|e| crate::error::HypermailError::Parse(format!("Invalid base64 text: {e}")))?;
133
134    let clean: String = text.chars().filter(|c| !c.is_whitespace()).collect();
135
136    use base64::Engine as _;
137    let engine = base64::engine::general_purpose::STANDARD;
138    engine
139        .decode(&clean)
140        .map_err(|e| crate::error::HypermailError::Parse(format!("Base64 decode error: {e}")))
141}
142
143/// Decodes quoted-printable encoded data, handling soft line breaks and `_` as space.
144pub fn decode_quoted_printable(data: &[u8]) -> Vec<u8> {
145    let mut result = Vec::with_capacity(data.len());
146    let mut i = 0;
147    while i < data.len() {
148        if data[i] == b'=' {
149            if i + 2 < data.len() && data[i + 1] == b'\r' && data[i + 2] == b'\n' {
150                i += 3;
151                continue;
152            }
153            if i + 2 < data.len() {
154                if let (Some(h), Some(l)) = (hex_val(data[i + 1]), hex_val(data[i + 2])) {
155                    result.push(h << 4 | l);
156                    i += 3;
157                    continue;
158                }
159            }
160        }
161        if data[i] == b'_' {
162            result.push(b' ');
163        } else if data[i] != b'\r' {
164            result.push(data[i]);
165        }
166        i += 1;
167    }
168    result
169}
170
171/// Decodes uuencoded data, returning `None` if no valid uuencode block is found.
172pub fn decode_uuencode(data: &[u8]) -> Option<Vec<u8>> {
173    let text = std::str::from_utf8(data).ok()?;
174    let mut result = Vec::new();
175    let mut in_encoded = false;
176
177    for line in text.lines() {
178        let line = line.trim_end();
179        if line.starts_with("begin ") {
180            in_encoded = true;
181            continue;
182        }
183        if line == "end" || line == "`" {
184            in_encoded = false;
185            continue;
186        }
187        if !in_encoded || line.is_empty() {
188            continue;
189        }
190
191        let bytes = line.as_bytes();
192        if bytes.is_empty() {
193            continue;
194        }
195
196        let count = (bytes[0] as usize - 32) & 0x3f;
197        if count == 0 {
198            continue;
199        }
200
201        let mut buf = [0u8; 3];
202        let mut j = 1;
203        let mut out = 0;
204
205        while j < bytes.len() && out < count {
206            let mut chars = [0u8; 4];
207            let mut n = 0;
208            while n < 4 && j < bytes.len() {
209                chars[n] = bytes[j].wrapping_sub(32) & 0x3f;
210                j += 1;
211                n += 1;
212            }
213
214            if n >= 2 {
215                buf[0] = (chars[0] << 2) | (chars[1] >> 4);
216            }
217            if n >= 3 {
218                buf[1] = (chars[1] << 4) | (chars[2] >> 2);
219            }
220            if n >= 4 {
221                buf[2] = (chars[2] << 6) | chars[3];
222            }
223
224            let to_push = n.saturating_sub(1);
225            result.extend_from_slice(&buf[..to_push]);
226            out += to_push;
227        }
228    }
229
230    if result.is_empty() {
231        None
232    } else {
233        Some(result)
234    }
235}
236
237/// Combined MIME content-type and transfer-encoding information for a message part.
238#[derive(Debug, Clone)]
239pub struct MimeInfo {
240    pub content_type: ContentType,
241    pub content_transfer_encoding: Option<String>,
242}
243
244/// Extracts MIME info (content-type and transfer-encoding) from parsed headers.
245pub fn parse_mime_info(headers: &[(String, String)]) -> Option<MimeInfo> {
246    let ct_str = headers
247        .iter()
248        .find(|(name, _)| name.eq_ignore_ascii_case("content-type"))
249        .map(|(_, val)| val.as_str())?;
250
251    let content_type = ContentType::parse(ct_str);
252    let cte = headers
253        .iter()
254        .find(|(name, _)| name.eq_ignore_ascii_case("content-transfer-encoding"))
255        .map(|(_, val)| val.trim().to_lowercase());
256
257    Some(MimeInfo { content_type, content_transfer_encoding: cte })
258}
259
260fn find_multipart_charset(body: &[u8], boundary: &str) -> Option<String> {
261    let boundary_tag = format!("--{}", boundary);
262    let boundary_bytes = boundary_tag.as_bytes();
263    let mut pos = 0;
264
265    while pos < body.len() {
266        // Find next boundary starting from pos
267        let start =
268            match body[pos..].windows(boundary_bytes.len()).position(|w| w == boundary_bytes) {
269                Some(offset) => pos + offset,
270                None => break, // No more boundaries found, exit loop
271            };
272
273        let after_boundary = &body[start + boundary_bytes.len()..];
274
275        // Verify boundary is followed by newline (not just part of content)
276        let after_eol = if after_boundary.starts_with(b"\r\n") {
277            &after_boundary[2..]
278        } else if after_boundary.starts_with(b"\n") {
279            &after_boundary[1..]
280        } else {
281            // Not a valid boundary, continue searching
282            pos = start + 1;
283            continue;
284        };
285
286        // Find end of headers (empty line)
287        let header_end = after_eol
288            .windows(2)
289            .position(|w| w == b"\n\n")
290            .or_else(|| after_eol.windows(4).position(|w| w == b"\r\n\r\n").map(|p| p + 2));
291
292        if let Some(header_end) = header_end {
293            let part_headers = &after_eol[..header_end];
294            if let Ok(header_block) = std::str::from_utf8(part_headers) {
295                for line in header_block.lines() {
296                    let lower = line.to_lowercase();
297                    if lower.starts_with("content-type:") {
298                        if let Some(charset_start) = lower.find("charset=") {
299                            let after = &line[charset_start + 8..];
300                            let charset = after.trim().trim_matches('"').trim_matches('\'');
301                            let charset =
302                                charset.split([';', ' ', '\r', '\n']).next().unwrap_or(charset);
303                            if !charset.is_empty() {
304                                return Some(charset.to_string());
305                            }
306                        }
307                    }
308                }
309            }
310        }
311
312        // Move past this boundary to search for next part
313        pos = start + boundary_bytes.len();
314    }
315    None
316}
317
318/// Decodes a MIME message body using its content-type and transfer-encoding.
319///
320/// Handles charset conversion, multipart boundaries, and format=flowed unwrapping.
321pub fn decode_body(body: &[u8], mime_info: &MimeInfo) -> String {
322    let decoded_bytes = match mime_info.content_transfer_encoding.as_deref() {
323        Some("base64") => match decode_base64(body) {
324            Ok(bytes) => bytes,
325            Err(_) => body.to_vec(),
326        },
327        Some("quoted-printable") | Some("qp") => decode_quoted_printable(body),
328        // 7bit, 8bit, binary → use raw bytes
329        _ => body.to_vec(),
330    };
331
332    let charset: Option<String> =
333        mime_info.content_type.charset().map(|s| s.to_string()).or_else(|| {
334            if mime_info.content_type.is_multipart() {
335                if let Some(boundary) = mime_info.content_type.boundary() {
336                    find_multipart_charset(body, boundary)
337                } else {
338                    None
339                }
340            } else {
341                None
342            }
343        });
344
345    // Use smart charset detection that handles mislabeled charsets
346    if let Some(ref charset) = charset {
347        return crate::headers::decode_to_utf8(&decoded_bytes, charset);
348    }
349
350    // No charset specified: try UTF-8 first, then common fallbacks
351    if let Ok(s) = std::str::from_utf8(&decoded_bytes) {
352        return s.to_string();
353    }
354
355    // Try common Greek/European charsets as fallback
356    for label in &["windows-1253", "iso-8859-7", "iso-8859-1", "windows-1252"] {
357        if let Some(encoding) = encoding_rs::Encoding::for_label(label.as_bytes()) {
358            let (cow, _, _) = encoding.decode(&decoded_bytes);
359            if !cow.contains('\u{FFFD}') {
360                return cow.into_owned();
361            }
362        }
363    }
364
365    String::from_utf8_lossy(&decoded_bytes).to_string()
366}
367
368/// QUAL-4: Variant of `decode_body` that skips internal charset resolution,
369/// using the already-resolved `charset` string instead.
370fn decode_body_with_charset(body: &[u8], mime_info: &MimeInfo, charset: Option<&str>) -> String {
371    let decoded_bytes = match mime_info.content_transfer_encoding.as_deref() {
372        Some("base64") => match decode_base64(body) {
373            Ok(bytes) => bytes,
374            Err(_) => body.to_vec(),
375        },
376        Some("quoted-printable") | Some("qp") => decode_quoted_printable(body),
377        _ => body.to_vec(),
378    };
379
380    if let Some(cs) = charset {
381        return crate::headers::decode_to_utf8(&decoded_bytes, cs);
382    }
383
384    if let Ok(s) = std::str::from_utf8(&decoded_bytes) {
385        return s.to_string();
386    }
387
388    for label in &["windows-1253", "iso-8859-7", "iso-8859-1", "windows-1252"] {
389        if let Some(encoding) = encoding_rs::Encoding::for_label(label.as_bytes()) {
390            let (cow, _, _) = encoding.decode(&decoded_bytes);
391            if !cow.contains('\u{FFFD}') {
392                return cow.into_owned();
393            }
394        }
395    }
396
397    String::from_utf8_lossy(&decoded_bytes).to_string()
398}
399
400fn resolve_charset(body_raw: &[u8], mi: &MimeInfo) -> Option<String> {
401    mi.content_type.charset().map(|s| s.to_string()).or_else(|| {
402        if mi.content_type.is_multipart() {
403            if let Some(boundary) = mi.content_type.boundary() {
404                find_multipart_charset(body_raw, boundary)
405            } else {
406                None
407            }
408        } else {
409            None
410        }
411    })
412}
413
414/// Processes a MIME message body, returning decoded text and detected charset.
415///
416/// Handles multipart messages, inline images, attachments, and charset detection.
417///
418/// # Security
419///
420/// Only safe image MIME types are embedded inline; SVG is excluded due to script risks.
421pub fn process_mime_body(
422    headers: &[(String, String)],
423    body_raw: &[u8],
424) -> (String, Option<String>) {
425    let mi = parse_mime_info(headers);
426    if let Some(ref mi) = mi {
427        // Check if this is a multipart message
428        if mi.content_type.is_multipart() {
429            if let Some(boundary) = mi.content_type.boundary() {
430                return process_multipart_body(body_raw, boundary, mi);
431            }
432        }
433
434        let charset = resolve_charset(body_raw, mi);
435        // QUAL-4: Use decode_body_with_charset to avoid resolving charset twice.
436        let mut decoded = decode_body_with_charset(body_raw, mi, charset.as_deref());
437        // RFC 3676: unwrap format=flowed text
438        if mi
439            .content_type
440            .params
441            .get("format")
442            .map(|v| v.eq_ignore_ascii_case("flowed"))
443            .unwrap_or(false)
444        {
445            decoded = unflow_text(&decoded);
446        }
447        (decoded, charset)
448    } else {
449        // No Content-Type header: try UTF-8 first, then fallback charsets
450        if let Ok(s) = std::str::from_utf8(body_raw) {
451            return (s.to_string(), None);
452        }
453        for label in &["windows-1253", "iso-8859-7", "iso-8859-1", "windows-1252"] {
454            if let Some(encoding) = encoding_rs::Encoding::for_label(label.as_bytes()) {
455                let (cow, _, _) = encoding.decode(body_raw);
456                if !cow.contains('\u{FFFD}') {
457                    return (cow.into_owned(), Some(label.to_string()));
458                }
459            }
460        }
461        (String::from_utf8_lossy(body_raw).to_string(), None)
462    }
463}
464
465fn process_multipart_body(
466    body: &[u8],
467    boundary: &str,
468    parent_mime: &MimeInfo,
469) -> (String, Option<String>) {
470    let is_alternative = parent_mime.content_type.subtype == "alternative";
471    let boundary_tag = format!("--{}", boundary);
472    let boundary_bytes = boundary_tag.as_bytes();
473    let mut result = String::new();
474    let mut detected_charset = None;
475    let mut pos = 0;
476
477    // For multipart/alternative: collect all text parts, then pick the best one.
478    // Prefer text/plain over text/html to avoid rendering raw HTML.
479    let mut alt_plain: Option<(String, Option<String>)> = None;
480    let mut alt_html: Option<(String, Option<String>)> = None;
481
482    while pos < body.len() {
483        // Find next boundary
484        let start =
485            match body[pos..].windows(boundary_bytes.len()).position(|w| w == boundary_bytes) {
486                Some(offset) => pos + offset,
487                None => break,
488            };
489
490        // Check for end boundary
491        let after_boundary = &body[start + boundary_bytes.len()..];
492        if after_boundary.starts_with(b"--") {
493            // End boundary found
494            break;
495        }
496
497        // Skip to content after boundary line
498        let after_eol = if after_boundary.starts_with(b"\r\n") {
499            &after_boundary[2..]
500        } else if after_boundary.starts_with(b"\n") {
501            &after_boundary[1..]
502        } else {
503            pos = start + boundary_bytes.len();
504            continue;
505        };
506
507        // Find end of part headers
508        let header_end = after_eol
509            .windows(2)
510            .position(|w| w == b"\n\n")
511            .or_else(|| after_eol.windows(4).position(|w| w == b"\r\n\r\n").map(|p| p + 2));
512
513        if let Some(header_end) = header_end {
514            let part_headers_bytes = &after_eol[..header_end];
515            let part_body_start = header_end + 2;
516
517            // Find next boundary to determine part body end
518            let part_body = if let Some(next_boundary_pos) = after_eol[part_body_start..]
519                .windows(boundary_bytes.len())
520                .position(|w| w == boundary_bytes)
521            {
522                &after_eol[part_body_start..part_body_start + next_boundary_pos]
523            } else {
524                &after_eol[part_body_start..]
525            };
526
527            // Parse part headers
528            if let Ok(headers_str) = std::str::from_utf8(part_headers_bytes) {
529                let mut part_headers = Vec::new();
530                for line in headers_str.lines() {
531                    if let Some((name, value)) = line.split_once(':') {
532                        part_headers.push((name.trim().to_lowercase(), value.trim().to_string()));
533                    }
534                }
535
536                // Check if this part is an attachment, inline content, or has Content-ID
537                let mut is_attachment = false;
538                let mut _has_content_id = false;
539                let mut content_type_main = String::new();
540                let mut encoding = String::new();
541
542                for (name, value) in &part_headers {
543                    if name == "content-disposition" {
544                        is_attachment = value.to_lowercase().starts_with("attachment");
545                    }
546                    if name == "content-id" {
547                        _has_content_id = true;
548                    }
549                    if name == "content-type" {
550                        if let Some(main_type) = value.split(';').next() {
551                            content_type_main = main_type.trim().to_lowercase();
552                        }
553                    }
554                    if name == "content-transfer-encoding" {
555                        encoding = value.trim().to_lowercase();
556                    }
557                }
558
559                // Allowlist of safe image MIME types for inline embedding.
560                // image/svg+xml is excluded — SVG can contain scripts.
561                const SAFE_IMAGE_TYPES: &[&str] = &[
562                    "image/gif",
563                    "image/jpeg",
564                    "image/jpg",
565                    "image/png",
566                    "image/webp",
567                    "image/bmp",
568                    "image/tiff",
569                ];
570
571                // Determine how to handle this part
572                if content_type_main.starts_with("image/")
573                    && SAFE_IMAGE_TYPES.contains(&content_type_main.as_str())
574                {
575                    // Always embed images inline for the HTML archive — viewers browse,
576                    // they don't download.  Content-Disposition: attachment is an email-client
577                    // hint that does not apply here.  Only fall back to a link if the raw
578                    // data is missing or decoding fails.
579                    let image_data = if encoding == "base64" {
580                        decode_base64(part_body.trim_ascii()).ok()
581                    } else if !part_body.is_empty() {
582                        Some(part_body.to_vec())
583                    } else {
584                        None
585                    };
586
587                    if let Some(data) = image_data {
588                        use base64::Engine as _;
589                        let engine = base64::engine::general_purpose::STANDARD;
590                        let base64_data = engine.encode(&data);
591                        if !result.is_empty() {
592                            result.push('\n');
593                        }
594                        result.push_str(&format!(
595                            "[INLINE_IMAGE:{}:{}]\n",
596                            content_type_main, base64_data
597                        ));
598                    } else if let Some(filename) = extract_filename(&part_headers) {
599                        // Decoding failed — fall back to a named attachment link
600                        if !result.is_empty() {
601                            result.push('\n');
602                        }
603                        result.push_str(&format!("[Attachment: {}]\n", filename));
604                    }
605                } else if content_type_main.starts_with("image/")
606                    || is_attachment
607                    || content_type_main.starts_with("application/")
608                {
609                    // Non-safe image or non-image attachment - just note it
610                    if let Some(filename) = extract_filename(&part_headers) {
611                        if !result.is_empty() {
612                            result.push('\n');
613                        }
614                        result.push_str(&format!("[Attachment: {}]\n", filename));
615                    }
616                } else if content_type_main.starts_with("text/") || content_type_main.is_empty() {
617                    // Process text content
618                    let (decoded, charset) = process_mime_body(&part_headers, part_body);
619                    if is_alternative {
620                        // LOG-2: For multipart/alternative, collect parts separately.
621                        if content_type_main == "text/plain" || content_type_main.is_empty() {
622                            if alt_plain.is_none() {
623                                alt_plain = Some((decoded, charset));
624                            }
625                        } else if content_type_main == "text/html" && alt_html.is_none() {
626                            alt_html = Some((decoded, charset));
627                        }
628                        // other text/* subtypes ignored for alternative
629                    } else {
630                        if detected_charset.is_none() && charset.is_some() {
631                            detected_charset = charset;
632                        }
633                        if !result.is_empty() && !decoded.is_empty() {
634                            result.push('\n');
635                        }
636                        result.push_str(&decoded);
637                    }
638                }
639            }
640        }
641
642        // Move to next part
643        pos = start + boundary_bytes.len();
644    }
645
646    // LOG-2: For multipart/alternative, select the single best part.
647    // Prefer text/plain; fall back to text/html if no plain part exists.
648    if is_alternative {
649        let chosen = alt_plain.or(alt_html);
650        if let Some((text, charset)) = chosen {
651            return (text, charset);
652        }
653        return (result, detected_charset);
654    }
655
656    (result, detected_charset)
657}
658
659fn extract_filename(headers: &[(String, String)]) -> Option<String> {
660    for (name, value) in headers {
661        if name == "content-disposition" || name == "content-type" {
662            // Try RFC 2231 continuation first (filename*0=, filename*1=, ...)
663            if let Some(f) = extract_rfc2231_filename(value) {
664                return Some(f);
665            }
666            // Try RFC 2231 charset encoding (filename*=charset'lang'value)
667            if let Some(f) = extract_rfc2231_encoded_filename(value) {
668                return Some(f);
669            }
670            // Fall back to simple filename= or name=
671            for param in value.split(';') {
672                let param = param.trim();
673                if let Some(filename_part) =
674                    param.strip_prefix("filename=").or_else(|| param.strip_prefix("name="))
675                {
676                    let filename = filename_part.trim().trim_matches('"').trim_matches('\'');
677                    if !filename.is_empty() {
678                        return Some(filename.to_string());
679                    }
680                }
681            }
682        }
683    }
684    None
685}
686
687fn extract_rfc2231_filename(value: &str) -> Option<String> {
688    let mut parts: Vec<(usize, String)> = Vec::new();
689    for param in value.split(';') {
690        let param = param.trim();
691        for prefix in &["filename*", "name*"] {
692            if let Some(rest) = param.strip_prefix(prefix) {
693                if let Some(eq_pos) = rest.find('=') {
694                    let num_part = &rest[..eq_pos];
695                    let val_part = &rest[eq_pos + 1..];
696                    let num_str = num_part.trim_end_matches('*');
697                    if let Ok(idx) = num_str.parse::<usize>() {
698                        let val = val_part.trim().trim_matches('"').trim_matches('\'');
699                        let decoded = if num_part.ends_with('*') {
700                            decode_rfc2231_value(val)
701                        } else {
702                            val.to_string()
703                        };
704                        parts.push((idx, decoded));
705                    }
706                }
707            }
708        }
709    }
710    if parts.is_empty() {
711        return None;
712    }
713    parts.sort_by_key(|(idx, _)| *idx);
714    let result: String = parts.into_iter().map(|(_, v)| v).collect();
715    if result.is_empty() {
716        None
717    } else {
718        Some(result)
719    }
720}
721
722fn extract_rfc2231_encoded_filename(value: &str) -> Option<String> {
723    for param in value.split(';') {
724        let param = param.trim();
725        for prefix in &["filename*=", "name*="] {
726            if let Some(rest) = param.strip_prefix(prefix) {
727                let val = rest.trim().trim_matches('"');
728                return Some(decode_rfc2231_value(val));
729            }
730        }
731    }
732    None
733}
734
735fn decode_rfc2231_value(value: &str) -> String {
736    let parts: Vec<&str> = value.splitn(3, '\'').collect();
737    if parts.len() == 3 {
738        let charset = parts[0];
739        let encoded = parts[2];
740        let decoded_bytes = percent_decode_bytes(encoded);
741        let encoding =
742            encoding_rs::Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::UTF_8);
743        let (result, _, _) = encoding.decode(&decoded_bytes);
744        result.into_owned()
745    } else {
746        let decoded_bytes = percent_decode_bytes(value);
747        String::from_utf8_lossy(&decoded_bytes).into_owned()
748    }
749}
750
751fn percent_decode_bytes(input: &str) -> Vec<u8> {
752    let mut result = Vec::with_capacity(input.len());
753    let bytes = input.as_bytes();
754    let mut i = 0;
755    while i < bytes.len() {
756        if bytes[i] == b'%' && i + 2 < bytes.len() {
757            if let Ok(byte) =
758                u8::from_str_radix(std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or(""), 16)
759            {
760                result.push(byte);
761                i += 3;
762                continue;
763            }
764        }
765        result.push(bytes[i]);
766        i += 1;
767    }
768    result
769}
770
771/// RFC 3676: Unwrap format=flowed text
772/// Lines ending with a space (SP) are joined with the following line.
773/// Lines beginning with "-- " are signature separators (never flowed).
774pub fn unflow_text(text: &str) -> String {
775    let mut result = String::with_capacity(text.len());
776
777    for line in text.lines() {
778        // Signature separator is never flowed
779        if line == "-- " {
780            result.push_str(line);
781            result.push('\n');
782            continue;
783        }
784
785        if line.ends_with(' ') {
786            result.push_str(line.trim_end_matches(' '));
787            result.push(' ');
788        } else {
789            result.push_str(line);
790            result.push('\n');
791        }
792    }
793    result
794}
795
796#[cfg(test)]
797mod tests {
798    use super::*;
799
800    #[test]
801    fn test_content_type_parse() {
802        let ct = ContentType::parse("text/plain; charset=utf-8");
803        assert_eq!(ct.type_, "text");
804        assert_eq!(ct.subtype, "plain");
805        assert_eq!(ct.charset(), Some("utf-8"));
806    }
807
808    #[test]
809    fn test_content_type_multipart() {
810        let ct = ContentType::parse("multipart/mixed; boundary=\"----=_Part_123\"");
811        assert!(ct.is_multipart());
812        assert_eq!(ct.boundary(), Some("----=_Part_123"));
813    }
814
815    #[test]
816    fn test_content_disposition() {
817        let cd = ContentDisposition::parse("attachment; filename=\"test.pdf\"");
818        assert!(cd.is_attachment());
819        assert_eq!(cd.filename(), Some("test.pdf"));
820    }
821
822    #[test]
823    fn test_content_disposition_inline() {
824        let cd = ContentDisposition::parse("inline");
825        assert!(!cd.is_attachment());
826    }
827
828    #[test]
829    fn test_base64_decode() {
830        let data = b"SGVsbG8gV29ybGQ=";
831        let decoded = decode_base64(data).unwrap();
832        assert_eq!(decoded, b"Hello World");
833    }
834
835    #[test]
836    fn test_base64_decode_with_newlines() {
837        let data = b"SGVs\nbG8g\nV29y\nbGQ=";
838        let decoded = decode_base64(data).unwrap();
839        assert_eq!(decoded, b"Hello World");
840    }
841
842    #[test]
843    fn test_quoted_printable_decode() {
844        let data = b"=48=C3=A5kan";
845        let decoded = decode_quoted_printable(data);
846        assert_eq!(std::str::from_utf8(&decoded).unwrap(), "Håkan");
847    }
848
849    #[test]
850    fn test_quoted_printable_soft_break() {
851        let data = b"line=\r\ncontinued";
852        let decoded = decode_quoted_printable(data);
853        assert_eq!(std::str::from_utf8(&decoded).unwrap(), "linecontinued");
854    }
855
856    #[test]
857    fn test_uuencode_simple() {
858        let data = b"begin 644 test.txt\n+5B5C(&%L9&%C\n`\nend\n";
859        let decoded = decode_uuencode(data);
860        assert!(decoded.is_some());
861        assert!(!decoded.unwrap().is_empty());
862    }
863
864    #[test]
865    fn test_parse_mime_info() {
866        let headers = vec![
867            ("content-type".to_string(), "text/plain; charset=iso-8859-1".to_string()),
868            ("content-transfer-encoding".to_string(), "quoted-printable".to_string()),
869        ];
870        let mi = parse_mime_info(&headers).unwrap();
871        assert_eq!(mi.content_type.charset(), Some("iso-8859-1"));
872        assert_eq!(mi.content_transfer_encoding.as_deref(), Some("quoted-printable"));
873    }
874
875    #[test]
876    fn test_parse_mime_info_no_cte() {
877        let headers = vec![("content-type".to_string(), "text/plain; charset=utf-8".to_string())];
878        let mi = parse_mime_info(&headers).unwrap();
879        assert_eq!(mi.content_type.charset(), Some("utf-8"));
880        assert!(mi.content_transfer_encoding.is_none());
881    }
882
883    #[test]
884    fn test_parse_mime_info_no_ct() {
885        let headers: Vec<(String, String)> = vec![("from".to_string(), "a@b.com".to_string())];
886        assert!(parse_mime_info(&headers).is_none());
887    }
888
889    #[test]
890    fn test_decode_body_base64() {
891        let headers = vec![
892            ("content-type".to_string(), "text/plain; charset=utf-8".to_string()),
893            ("content-transfer-encoding".to_string(), "base64".to_string()),
894        ];
895        let mi = parse_mime_info(&headers).unwrap();
896        let body = b"SGVsbG8gV29ybGQ=";
897        let decoded = decode_body(body, &mi);
898        assert_eq!(decoded, "Hello World");
899    }
900
901    #[test]
902    fn test_decode_body_quoted_printable() {
903        let headers = vec![
904            ("content-type".to_string(), "text/plain; charset=utf-8".to_string()),
905            ("content-transfer-encoding".to_string(), "quoted-printable".to_string()),
906        ];
907        let mi = parse_mime_info(&headers).unwrap();
908        let body = b"Hello=20World=21";
909        let decoded = decode_body(body, &mi);
910        assert_eq!(decoded, "Hello World!");
911    }
912
913    #[test]
914    fn test_decode_body_7bit_passthrough() {
915        let headers = vec![("content-type".to_string(), "text/plain; charset=utf-8".to_string())];
916        let mi = parse_mime_info(&headers).unwrap();
917        let body = b"Hello World";
918        let decoded = decode_body(body, &mi);
919        assert_eq!(decoded, "Hello World");
920    }
921
922    #[test]
923    fn test_decode_body_charset_iso8859_1() {
924        let headers = vec![
925            ("content-type".to_string(), "text/plain; charset=iso-8859-1".to_string()),
926            ("content-transfer-encoding".to_string(), "quoted-printable".to_string()),
927        ];
928        let mi = parse_mime_info(&headers).unwrap();
929        // "H=E5kan" with iso-8859-1: å = 0xE5 = 229
930        let body = b"H=E5kan";
931        let decoded = decode_body(body, &mi);
932        assert_eq!(decoded, "Håkan");
933    }
934
935    #[test]
936    fn test_process_mime_body_no_mime() {
937        let headers = vec![("from".to_string(), "a@b.com".to_string())];
938        let (body, charset) = process_mime_body(&headers, b"Hello World");
939        assert_eq!(body, "Hello World");
940        assert!(charset.is_none());
941    }
942
943    #[test]
944    fn test_process_mime_body_with_charset() {
945        let headers =
946            vec![("content-type".to_string(), "text/plain; charset=iso-8859-1".to_string())];
947        let (body, charset) = process_mime_body(&headers, b"Hello");
948        assert_eq!(body, "Hello");
949        assert_eq!(charset.as_deref(), Some("iso-8859-1"));
950    }
951
952    #[test]
953    fn test_decode_body_iso_8859_7_kalimera() {
954        let headers =
955            vec![("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string())];
956        let mi = parse_mime_info(&headers).unwrap();
957        // "Καλημερα" in ISO-8859-7: Κ=0xCA α=0xE1 λ=0xEB η=0xE7 μ=0xEC ε=0xE5 ρ=0xF1 α=0xE1
958        let body = b"\xCA\xE1\xEB\xE7\xEC\xE5\xF1\xE1";
959        let decoded = decode_body(body, &mi);
960        assert_eq!(decoded, "Καλημερα");
961    }
962
963    #[test]
964    fn test_decode_body_windows_1253_kalimera() {
965        let headers =
966            vec![("content-type".to_string(), "text/plain; charset=windows-1253".to_string())];
967        let mi = parse_mime_info(&headers).unwrap();
968        // "Καλημερα" in Windows-1253 (same code points for unaccented Greek)
969        let body = b"\xCA\xE1\xEB\xE7\xEC\xE5\xF1\xE1";
970        let decoded = decode_body(body, &mi);
971        assert_eq!(decoded, "Καλημερα");
972    }
973
974    #[test]
975    fn test_decode_body_iso_8859_7_tonos() {
976        let headers =
977            vec![("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string())];
978        let mi = parse_mime_info(&headers).unwrap();
979        // "άνθρωπος" in ISO-8859-7: ά=0xDC ν=0xED θ=0xE8 ρ=0xF1 ω=0xF9 π=0xF0 ο=0xEF ς=0xF2
980        let body = b"\xDC\xED\xE8\xF1\xF9\xF0\xEF\xF2";
981        let decoded = decode_body(body, &mi);
982        assert_eq!(decoded, "άνθρωπος");
983    }
984
985    #[test]
986    fn test_decode_body_windows_1253_tonos() {
987        let headers =
988            vec![("content-type".to_string(), "text/plain; charset=windows-1253".to_string())];
989        let mi = parse_mime_info(&headers).unwrap();
990        // "άνθρωπος" in Windows-1253: ά=0xDC ν=0xED θ=0xE8 ρ=0xF1 ω=0xF9 π=0xF0 ο=0xEF ς=0xF2
991        let body = b"\xDC\xED\xE8\xF1\xF9\xF0\xEF\xF2";
992        let decoded = decode_body(body, &mi);
993        assert_eq!(decoded, "άνθρωπος");
994    }
995
996    #[test]
997    fn test_decode_body_no_charset_iso_8859_7_fallback() {
998        let headers = vec![("content-type".to_string(), "text/plain".to_string())];
999        let mi = parse_mime_info(&headers).unwrap();
1000        // "Καλημερα" in ISO-8859-7
1001        let body = b"\xCA\xE1\xEB\xE7\xEC\xE5\xF1\xE1";
1002        let decoded = decode_body(body, &mi);
1003        assert_eq!(decoded, "Καλημερα");
1004    }
1005
1006    #[test]
1007    fn test_decode_body_iso_8859_7_quoted_printable() {
1008        let headers = vec![
1009            ("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string()),
1010            ("content-transfer-encoding".to_string(), "quoted-printable".to_string()),
1011        ];
1012        let mi = parse_mime_info(&headers).unwrap();
1013        // QP-encoded "Καλημερα": Κ=CA α=E1 λ=EB η=E7 μ=EC ε=E5 ρ=F1 α=E1
1014        let body = b"\xCA=E1=EB=E7=EC=E5=F1=E1";
1015        let decoded = decode_body(body, &mi);
1016        assert_eq!(decoded, "Καλημερα");
1017    }
1018
1019    #[test]
1020    fn test_decode_body_iso_8859_7_base64() {
1021        let headers = vec![
1022            ("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string()),
1023            ("content-transfer-encoding".to_string(), "base64".to_string()),
1024        ];
1025        let mi = parse_mime_info(&headers).unwrap();
1026        // Base64 of ISO-8859-7 "Καλημερα" (bytes: CAE1EBE7ECE5F1E1)
1027        let body = b"yuHr5+zl8eE=";
1028        let decoded = decode_body(body, &mi);
1029        assert_eq!(decoded, "Καλημερα");
1030    }
1031
1032    #[test]
1033    fn test_find_multipart_charset_second_part() {
1034        // Multipart where first part has no charset, second part does
1035        let boundary = "----=_NextPart_000_1234";
1036        let body = "------=_NextPart_000_1234\n\
1037             Content-Type: text/plain; format=flowed\n\
1038             \n\
1039             Some plain text\n\
1040             \n\
1041             ------=_NextPart_000_1234\n\
1042             Content-Type: text/html; charset=\"iso-8859-7\"\n\
1043             \n\
1044             <p>Some text</p>\n\
1045             \n\
1046             ------=_NextPart_000_1234--\n"
1047            .to_string();
1048        let result = find_multipart_charset(body.as_bytes(), boundary);
1049        assert_eq!(result.as_deref(), Some("iso-8859-7"));
1050    }
1051
1052    #[test]
1053    fn test_find_multipart_charset_all_parts_no_charset() {
1054        // Multipart where NO part has a charset
1055        let boundary = "----=_NextPart_000_5678";
1056        let body = "------=_NextPart_000_5678\n\
1057             Content-Type: text/plain; format=flowed\n\
1058             \n\
1059             First part\n\
1060             \n\
1061             ------=_NextPart_000_5678\n\
1062             Content-Type: text/plain\n\
1063             \n\
1064             Second part\n\
1065             \n\
1066             ------=_NextPart_000_5678--\n"
1067            .to_string();
1068        let result = find_multipart_charset(body.as_bytes(), boundary);
1069        assert!(result.is_none());
1070    }
1071
1072    #[test]
1073    fn test_process_mime_body_multipart_charset_in_second_part() {
1074        let headers = vec![(
1075            "content-type".to_string(),
1076            "multipart/mixed; boundary=\"----=_NextPart_000_9999\"".to_string(),
1077        )];
1078        let body = b"------=_NextPart_000_9999\n\
1079             Content-Type: text/plain; format=flowed\n\
1080             Content-Transfer-Encoding: 8bit\n\
1081             \n\
1082             Hello\n\
1083             \n\
1084             ------=_NextPart_000_9999\n\
1085             Content-Type: text/html; charset=\"iso-8859-7\"\n\
1086             Content-Transfer-Encoding: 8bit\n\
1087             \n\
1088             \xCB\xE1\xEC\xE7\xED\xE5\xF1\xE1\n\
1089             \n\
1090             ------=_NextPart_000_9999--\n";
1091        let charset = resolve_charset(body, &parse_mime_info(&headers).unwrap());
1092        assert_eq!(
1093            charset.as_deref(),
1094            Some("iso-8859-7"),
1095            "Should detect charset from second part when first part lacks it"
1096        );
1097    }
1098
1099    #[test]
1100    fn test_decode_body_greek_utf8() {
1101        let headers = vec![("content-type".to_string(), "text/plain; charset=utf-8".to_string())];
1102        let mi = parse_mime_info(&headers).unwrap();
1103        let body = "Καλημερα".as_bytes();
1104        let decoded = decode_body(body, &mi);
1105        assert_eq!(decoded, "Καλημερα");
1106    }
1107
1108    #[test]
1109    fn test_process_mime_body_no_ct_greek_fallback() {
1110        // No Content-Type header, but body has Greek ISO-8859-7 bytes
1111        let headers = vec![("from".to_string(), "a@b.com".to_string())];
1112        // "Γεια" in ISO-8859-7: Γ=0xC3 ε=0xE5 ι=0xE9 α=0xE1
1113        let body = b"\xC3\xE5\xE9\xE1";
1114        let (decoded, charset) = process_mime_body(&headers, body);
1115        assert!(!decoded.contains('\u{FFFD}'), "Should decode Greek without replacement chars");
1116        // Should have detected a charset from fallbacks
1117        assert!(charset.is_some(), "Should report a detected charset");
1118        assert_eq!(decoded, "Γεια");
1119    }
1120
1121    // Additional comprehensive Greek charset tests
1122
1123    #[test]
1124    fn test_decode_body_uppercase_tonos_iso_8859_7() {
1125        // Test uppercase Greek with tonos: "Άνθρωπος" (capital Ά)
1126        // ISO-8859-7: Ά=0xB6 ν=0xED θ=0xE8 ρ=0xF1 ω=0xF9 π=0xF0 ο=0xEF ς=0xF2
1127        let headers =
1128            vec![("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string())];
1129        let mi = parse_mime_info(&headers).unwrap();
1130        let body = b"\xB6\xED\xE8\xF1\xF9\xF0\xEF\xF2";
1131        let decoded = decode_body(body, &mi);
1132        assert_eq!(decoded, "Άνθρωπος");
1133    }
1134
1135    #[test]
1136    fn test_decode_body_uppercase_tonos_windows_1253() {
1137        // Test uppercase Greek with tonos: "Άνθρωπος" (capital Ά)
1138        // Windows-1253: Ά=0xA2 ν=0xED θ=0xE8 ρ=0xF1 ω=0xF9 π=0xF0 ο=0xEF ς=0xF2
1139        let headers =
1140            vec![("content-type".to_string(), "text/plain; charset=windows-1253".to_string())];
1141        let mi = parse_mime_info(&headers).unwrap();
1142        let body = b"\xA2\xED\xE8\xF1\xF9\xF0\xEF\xF2";
1143        let decoded = decode_body(body, &mi);
1144        assert_eq!(decoded, "Άνθρωπος");
1145    }
1146
1147    #[test]
1148    fn test_decode_body_real_world_greek_phrase() {
1149        // Real-world phrase: "Καλό απόγευμα" (Good afternoon)
1150        // ISO-8859-7: Κ=0xCA α=0xE1 λ=0xEB ό=0xFC <space> α=0xE1 π=0xF0 ό=0xFC γ=0xE3 ε=0xE5 υ=0xF5 μ=0xEC α=0xE1
1151        let headers =
1152            vec![("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string())];
1153        let mi = parse_mime_info(&headers).unwrap();
1154        let body = b"\xCA\xE1\xEB\xFC\x20\xE1\xF0\xFC\xE3\xE5\xF5\xEC\xE1";
1155        let decoded = decode_body(body, &mi);
1156        assert_eq!(decoded, "Καλό απόγευμα");
1157    }
1158
1159    #[test]
1160    fn test_decode_body_mixed_greek_latin() {
1161        // Mixed text: "Hello Κόσμε!" (Hello World! in mixed Greek/Latin)
1162        // UTF-8 encoding for the Greek part
1163        let headers = vec![("content-type".to_string(), "text/plain; charset=utf-8".to_string())];
1164        let mi = parse_mime_info(&headers).unwrap();
1165        let body = "Hello Κόσμε!".as_bytes();
1166        let decoded = decode_body(body, &mi);
1167        assert_eq!(decoded, "Hello Κόσμε!");
1168    }
1169
1170    #[test]
1171    fn test_decode_body_question_marks_greek() {
1172        // Greek semicolon (U+037E) looks like ";" and question mark is ";"
1173        // "Πώς είσαι;" (How are you?)
1174        // ISO-8859-7: Π=0xD0 ώ=0xFE ς=0xF2 <space> ε=0xE5 ί=0xDF σ=0xF3 α=0xE1 ι=0xE9 ;
1175        let headers =
1176            vec![("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string())];
1177        let mi = parse_mime_info(&headers).unwrap();
1178        let body = b"\xD0\xFE\xF2\x20\xE5\xDF\xF3\xE1\xE9;";
1179        let decoded = decode_body(body, &mi);
1180        assert_eq!(decoded, "Πώς είσαι;");
1181    }
1182
1183    #[test]
1184    fn test_find_multipart_charset_mixed_encodings() {
1185        // Multipart with first part in UTF-8 (no charset param), second in ISO-8859-7
1186        let boundary = "----=_Part_123";
1187        let body = "------=_Part_123\n\
1188             Content-Type: text/plain\n\
1189             \n\
1190             English text\n\
1191             \n\
1192             ------=_Part_123\n\
1193             Content-Type: text/html; charset=\"iso-8859-7\"\n\
1194             \n\
1195             <p>Greek text</p>\n\
1196             \n\
1197             ------=_Part_123--\n"
1198            .to_string();
1199        let result = find_multipart_charset(body.as_bytes(), boundary);
1200        assert_eq!(
1201            result.as_deref(),
1202            Some("iso-8859-7"),
1203            "Should find charset from second part even when first part has none"
1204        );
1205    }
1206
1207    #[test]
1208    fn test_process_mime_body_multipart_with_greek_html() {
1209        // Real-world scenario: multipart/alternative with Greek HTML
1210        let headers = vec![(
1211            "content-type".to_string(),
1212            "multipart/alternative; boundary=\"----=_NextPart_000_1111\"".to_string(),
1213        )];
1214        let body = b"------=_NextPart_000_1111\n\
1215             Content-Type: text/plain; charset=\"iso-8859-7\"\n\
1216             \n\
1217             \xCA\xE1\xEB\xE7\xEC\xE5\xF1\xE1\n\
1218             \n\
1219             ------=_NextPart_000_1111\n\
1220             Content-Type: text/html; charset=\"iso-8859-7\"\n\
1221             \n\
1222             <html><body>\xCA\xE1\xEB\xE7\xEC\xE5\xF1\xE1</body></html>\n\
1223             \n\
1224             ------=_NextPart_000_1111--\n";
1225        let (decoded, charset) = process_mime_body(&headers, body);
1226        assert_eq!(charset.as_deref(), Some("iso-8859-7"));
1227        // Should decode Greek correctly from first text/plain part
1228        assert!(decoded.contains("Καλημερα"), "Should contain decoded Greek text");
1229        assert!(!decoded.contains('\u{FFFD}'), "Should not have replacement characters");
1230    }
1231
1232    #[test]
1233    fn test_decode_body_all_greek_letters_iso_8859_7() {
1234        // Test basic Greek alphabet (lowercase): α β γ δ ε
1235        // ISO-8859-7: α=0xE1 β=0xE2 γ=0xE3 δ=0xE4 ε=0xE5
1236        let headers =
1237            vec![("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string())];
1238        let mi = parse_mime_info(&headers).unwrap();
1239        let body = b"\xE1\xE2\xE3\xE4\xE5";
1240        let decoded = decode_body(body, &mi);
1241        assert_eq!(decoded, "αβγδε");
1242    }
1243
1244    #[test]
1245    fn test_decode_body_all_greek_letters_windows_1253() {
1246        // Test basic Greek alphabet (uppercase): Α Β Γ Δ Ε
1247        // Windows-1253: Α=0xC1 Β=0xC2 Γ=0xC3 Δ=0xC4 Ε=0xC5
1248        let headers =
1249            vec![("content-type".to_string(), "text/plain; charset=windows-1253".to_string())];
1250        let mi = parse_mime_info(&headers).unwrap();
1251        let body = b"\xC1\xC2\xC3\xC4\xC5";
1252        let decoded = decode_body(body, &mi);
1253        assert_eq!(decoded, "ΑΒΓΔΕ");
1254    }
1255
1256    #[test]
1257    fn test_decode_body_diaeresis_greek() {
1258        // Test Greek with diaeresis: "ϊδιος" (same, with diaeresis on iota)
1259        // ISO-8859-7: ϊ=0xFA δ=0xE4 ι=0xE9 ο=0xEF ς=0xF2
1260        let headers =
1261            vec![("content-type".to_string(), "text/plain; charset=iso-8859-7".to_string())];
1262        let mi = parse_mime_info(&headers).unwrap();
1263        let body = b"\xFA\xE4\xE9\xEF\xF2";
1264        let decoded = decode_body(body, &mi);
1265        assert_eq!(decoded, "ϊδιος");
1266    }
1267
1268    #[test]
1269    fn test_multipart_inline_image() {
1270        // Test multipart message with inline image (Content-Disposition: inline)
1271        let headers = vec![(
1272            "content-type".to_string(),
1273            "multipart/mixed; boundary=\"----=_Part_123\"".to_string(),
1274        )];
1275
1276        // Create a small 1x1 red pixel GIF
1277        let gif_bytes = b"R0lGODlhAQABAIAAAP8AAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7";
1278
1279        let body = format!(
1280            "------=_Part_123\n\
1281             Content-Type: text/plain; charset=utf-8\n\
1282             \n\
1283             Hello world\n\
1284             \n\
1285             ------=_Part_123\n\
1286             Content-Type: image/gif; name=\"pixel.gif\"\n\
1287             Content-Disposition: inline; filename=\"pixel.gif\"\n\
1288             Content-Transfer-Encoding: base64\n\
1289             \n\
1290             {}\n\
1291             \n\
1292             ------=_Part_123--\n",
1293            std::str::from_utf8(gif_bytes).unwrap()
1294        );
1295
1296        let (decoded, _charset) = process_mime_body(&headers, body.as_bytes());
1297
1298        // Should contain the text part
1299        assert!(decoded.contains("Hello world"), "Should contain text content");
1300
1301        // Should contain inline image marker (not escaped HTML)
1302        assert!(
1303            decoded.contains("[INLINE_IMAGE:image/gif:"),
1304            "Should contain inline image marker"
1305        );
1306        assert!(
1307            decoded.contains("R0lGODlhAQABAIAAAP8AAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"),
1308            "Should contain base64 data in marker"
1309        );
1310
1311        // Should NOT show MIME boundaries
1312        assert!(!decoded.contains("------=_Part_123"), "Should not contain MIME boundaries");
1313    }
1314
1315    #[test]
1316    fn test_multipart_attachment_image() {
1317        // Images are always embedded inline in the HTML archive regardless of
1318        // Content-Disposition: attachment — browsers browse, they don't download.
1319        let headers = vec![(
1320            "content-type".to_string(),
1321            "multipart/mixed; boundary=\"----=_Part_456\"".to_string(),
1322        )];
1323
1324        let gif_bytes = b"R0lGODlhAQABAIAAAP8AAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7";
1325
1326        let body = format!(
1327            "------=_Part_456\n\
1328             Content-Type: text/plain; charset=utf-8\n\
1329             \n\
1330             See attached image\n\
1331             \n\
1332             ------=_Part_456\n\
1333             Content-Type: image/gif; name=\"chart.gif\"\n\
1334             Content-Disposition: attachment; filename=\"chart.gif\"\n\
1335             Content-Transfer-Encoding: base64\n\
1336             \n\
1337             {}\n\
1338             \n\
1339             ------=_Part_456--\n",
1340            std::str::from_utf8(gif_bytes).unwrap()
1341        );
1342
1343        let (decoded, _charset) = process_mime_body(&headers, body.as_bytes());
1344
1345        // Should contain the text part
1346        assert!(decoded.contains("See attached image"), "Should contain text content");
1347
1348        // Image must be embedded inline regardless of Content-Disposition: attachment
1349        assert!(
1350            decoded.contains("[INLINE_IMAGE:image/gif:"),
1351            "Should embed image inline even when Content-Disposition is attachment"
1352        );
1353        assert!(
1354            !decoded.contains("[Attachment: chart.gif]"),
1355            "Should NOT show attachment notation for images"
1356        );
1357    }
1358
1359    #[test]
1360    fn test_multipart_image_with_content_id() {
1361        // Test multipart message with image referenced by Content-ID (for HTML email)
1362        let headers = vec![(
1363            "content-type".to_string(),
1364            "multipart/related; boundary=\"----=_Part_789\"".to_string(),
1365        )];
1366
1367        let gif_bytes = b"R0lGODlhAQABAIAAAP8AAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7";
1368
1369        let body = format!(
1370            "------=_Part_789\n\
1371             Content-Type: text/html; charset=utf-8\n\
1372             \n\
1373             <html><body>Logo: <img src=\"cid:logo@example.com\"></body></html>\n\
1374             \n\
1375             ------=_Part_789\n\
1376             Content-Type: image/gif; name=\"logo.gif\"\n\
1377             Content-ID: <logo@example.com>\n\
1378             Content-Transfer-Encoding: base64\n\
1379             \n\
1380             {}\n\
1381             \n\
1382             ------=_Part_789--\n",
1383            std::str::from_utf8(gif_bytes).unwrap()
1384        );
1385
1386        let (decoded, _charset) = process_mime_body(&headers, body.as_bytes());
1387
1388        // Should contain HTML part
1389        assert!(decoded.contains("<html>"), "Should contain HTML content");
1390
1391        // Should contain inline image marker (since it has Content-ID)
1392        assert!(
1393            decoded.contains("[INLINE_IMAGE:image/gif:"),
1394            "Should contain inline image marker for Content-ID image"
1395        );
1396    }
1397
1398    #[test]
1399    fn test_multipart_pdf_attachment() {
1400        // Test multipart message with PDF attachment
1401        let headers = vec![(
1402            "content-type".to_string(),
1403            "multipart/mixed; boundary=\"----=_Part_PDF\"".to_string(),
1404        )];
1405
1406        let pdf_bytes = b"JVBERi0xLjQKJeLjz9M="; // Minimal PDF header in base64
1407
1408        let body = format!(
1409            "------=_Part_PDF\n\
1410             Content-Type: text/plain; charset=utf-8\n\
1411             \n\
1412             Please review the attached document.\n\
1413             \n\
1414             ------=_Part_PDF\n\
1415             Content-Type: application/pdf; name=\"report.pdf\"\n\
1416             Content-Disposition: attachment; filename=\"report.pdf\"\n\
1417             Content-Transfer-Encoding: base64\n\
1418             \n\
1419             {}\n\
1420             \n\
1421             ------=_Part_PDF--\n",
1422            std::str::from_utf8(pdf_bytes).unwrap()
1423        );
1424
1425        let (decoded, _charset) = process_mime_body(&headers, body.as_bytes());
1426
1427        // Should contain the text part
1428        assert!(decoded.contains("Please review"), "Should contain text content");
1429
1430        // Should show attachment notation for PDF
1431        assert!(
1432            decoded.contains("[Attachment: report.pdf]"),
1433            "Should show PDF attachment notation"
1434        );
1435
1436        // Should NOT show raw base64 PDF data
1437        assert!(!decoded.contains("JVBERi0xLjQKJeLjz9M="), "Should not contain raw PDF base64");
1438        assert!(!decoded.contains("application/pdf"), "Should not show content-type in output");
1439    }
1440
1441    #[test]
1442    fn test_multipart_mixed_inline_and_attachment() {
1443        // Both inline and attachment-disposition images are now always embedded inline.
1444        let headers = vec![(
1445            "content-type".to_string(),
1446            "multipart/mixed; boundary=\"----=_Part_MIX\"".to_string(),
1447        )];
1448
1449        let gif_bytes = b"R0lGODlhAQABAIAAAP8AAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7";
1450
1451        let body = format!(
1452            "------=_Part_MIX\n\
1453             Content-Type: text/plain; charset=utf-8\n\
1454             \n\
1455             Email body text\n\
1456             \n\
1457             ------=_Part_MIX\n\
1458             Content-Type: image/gif; name=\"inline.gif\"\n\
1459             Content-Disposition: inline; filename=\"inline.gif\"\n\
1460             Content-Transfer-Encoding: base64\n\
1461             \n\
1462             {}\n\
1463             \n\
1464             ------=_Part_MIX\n\
1465             Content-Type: image/jpeg; name=\"photo.jpg\"\n\
1466             Content-Disposition: attachment; filename=\"photo.jpg\"\n\
1467             Content-Transfer-Encoding: base64\n\
1468             \n\
1469             {}\n\
1470             \n\
1471             ------=_Part_MIX--\n",
1472            std::str::from_utf8(gif_bytes).unwrap(),
1473            std::str::from_utf8(gif_bytes).unwrap()
1474        );
1475
1476        let (decoded, _charset) = process_mime_body(&headers, body.as_bytes());
1477
1478        // Should contain text
1479        assert!(decoded.contains("Email body text"), "Should contain text content");
1480
1481        // Both images must be embedded inline — disposition is irrelevant for archives
1482        let inline_count = decoded.matches("[INLINE_IMAGE:").count();
1483        assert_eq!(inline_count, 2, "Both images (inline + attachment) should be embedded");
1484
1485        assert!(!decoded.contains("[Attachment: "), "No image should remain as attachment link");
1486    }
1487
1488    #[test]
1489    fn test_multipart_greek_text_with_inline_image() {
1490        // Real-world test: Greek text with inline image
1491        let headers = vec![(
1492            "content-type".to_string(),
1493            "multipart/mixed; boundary=\"----=_Part_GR\"".to_string(),
1494        )];
1495
1496        // "Γεια σου" in ISO-8859-7: Γ=0xC3 ε=0xE5 ι=0xE9 α=0xE1 space σ=0xF3 ο=0xEF υ=0xF5
1497        let greek_text = vec![0xC3u8, 0xE5, 0xE9, 0xE1, 0x20, 0xF3, 0xEF, 0xF5];
1498        let gif_bytes = b"R0lGODlhAQABAIAAAP8AAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7";
1499
1500        let mut body =
1501            b"------=_Part_GR\nContent-Type: text/plain; charset=iso-8859-7\n\n".to_vec();
1502        body.extend_from_slice(&greek_text);
1503        body.extend_from_slice(b"\n\n------=_Part_GR\n");
1504        body.extend_from_slice(b"Content-Type: image/gif; name=\"icon.gif\"\n");
1505        body.extend_from_slice(b"Content-Disposition: inline; filename=\"icon.gif\"\n");
1506        body.extend_from_slice(b"Content-Transfer-Encoding: base64\n\n");
1507        body.extend_from_slice(gif_bytes);
1508        body.extend_from_slice(b"\n\n------=_Part_GR--\n");
1509
1510        let (decoded, charset) = process_mime_body(&headers, &body);
1511
1512        // Should detect ISO-8859-7 charset
1513        assert_eq!(charset.as_deref(), Some("iso-8859-7"), "Should detect Greek charset");
1514
1515        // Should contain decoded Greek text
1516        assert!(decoded.contains("Γεια σου"), "Should contain decoded Greek text");
1517
1518        // Should contain inline image marker
1519        assert!(
1520            decoded.contains("[INLINE_IMAGE:image/gif:"),
1521            "Should contain inline image marker"
1522        );
1523
1524        // Should NOT have mojibake or replacement characters
1525        assert!(!decoded.contains('\u{FFFD}'), "Should not have replacement characters");
1526    }
1527
1528    #[test]
1529    fn test_decode_body_mislabeled_iso_8859_1_as_greek() {
1530        // Real-world case: Body labeled as iso-8859-1 but contains Greek (iso-8859-7)
1531        // Greek text: "Σωστά όλα αυτά" (Correct, all that)
1532        // In iso-8859-7: Σ=0xD3 ω=0xF9 σ=0xF3 τ=0xF4 ά=0xDC space=0x20 ό=0xFC λ=0xEB α=0xE1
1533        let headers =
1534            vec![("content-type".to_string(), "text/plain; charset=iso-8859-1".to_string())];
1535        let mi = parse_mime_info(&headers).unwrap();
1536        let body = b"\xD3\xF9\xF3\xF4\xDC\x20\xFC\xEB\xE1\x20\xE1\xF5\xF4\xDC";
1537
1538        let decoded = decode_body(body, &mi);
1539
1540        // Should auto-detect Greek despite iso-8859-1 label
1541        assert!(
1542            decoded.contains("Σωστά") || decoded.contains("ωστά"),
1543            "Should detect Greek in mislabeled iso-8859-1 body: got '{}'",
1544            decoded
1545        );
1546
1547        // Should NOT have mojibake
1548        assert!(!decoded.contains("ÓùóôÜ"), "Should not have mojibake: got '{}'", decoded);
1549    }
1550
1551    #[test]
1552    fn test_rfc2231_continuation_filename() {
1553        let headers = vec![(
1554            "content-disposition".to_string(),
1555            "attachment; filename*0=\"very_long_\"; filename*1=\"filename.pdf\"".to_string(),
1556        )];
1557        assert_eq!(extract_filename(&headers), Some("very_long_filename.pdf".to_string()));
1558    }
1559
1560    #[test]
1561    fn test_rfc2231_encoded_filename() {
1562        let headers = vec![(
1563            "content-disposition".to_string(),
1564            "attachment; filename*=utf-8''%C3%A9tude.pdf".to_string(),
1565        )];
1566        assert_eq!(extract_filename(&headers), Some("étude.pdf".to_string()));
1567    }
1568
1569    #[test]
1570    fn test_format_flowed_unwrap() {
1571        let input = "This is a long \nline that was wrapped.\n\nNew paragraph.\n";
1572        let expected = "This is a long line that was wrapped.\n\nNew paragraph.\n";
1573        assert_eq!(unflow_text(input), expected);
1574    }
1575
1576    #[test]
1577    fn test_format_flowed_signature_not_unwrapped() {
1578        let input = "Hello \nworld.\n-- \nSignature\n";
1579        let expected = "Hello world.\n-- \nSignature\n";
1580        assert_eq!(unflow_text(input), expected);
1581    }
1582
1583    #[test]
1584    fn test_decode_body_correct_iso_8859_1_latin_preserved() {
1585        // Verify that actual Latin-1 content is NOT incorrectly "fixed" to Greek
1586        // French: "Café résumé"
1587        let headers =
1588            vec![("content-type".to_string(), "text/plain; charset=iso-8859-1".to_string())];
1589        let mi = parse_mime_info(&headers).unwrap();
1590        let body = b"Caf\xE9 r\xE9sum\xE9";
1591
1592        let decoded = decode_body(body, &mi);
1593
1594        // Should preserve correct Latin-1
1595        assert_eq!(
1596            decoded, "Café résumé",
1597            "Should preserve correct Latin-1 text: got '{}'",
1598            decoded
1599        );
1600    }
1601
1602    #[test]
1603    fn test_content_type_is_text() {
1604        let ct = ContentType::parse("text/html; charset=utf-8");
1605        assert!(ct.is_text());
1606        assert!(!ct.is_multipart());
1607    }
1608
1609    #[test]
1610    fn test_content_type_full_type() {
1611        let ct = ContentType::parse("application/pdf");
1612        assert_eq!(ct.full_type(), "application/pdf");
1613    }
1614
1615    #[test]
1616    fn test_content_type_name_param() {
1617        let ct = ContentType::parse("image/jpeg; name=\"photo.jpg\"");
1618        assert_eq!(ct.name(), Some("photo.jpg"));
1619    }
1620
1621    #[test]
1622    fn test_content_type_no_subtype() {
1623        let ct = ContentType::parse("text");
1624        assert_eq!(ct.type_, "text");
1625        assert_eq!(ct.subtype, "");
1626    }
1627
1628    #[test]
1629    fn test_content_disposition_no_filename() {
1630        let cd = ContentDisposition::parse("inline");
1631        assert_eq!(cd.filename(), None);
1632        assert!(!cd.is_attachment());
1633    }
1634
1635    #[test]
1636    fn test_decode_quoted_printable_underscore_as_space() {
1637        let data = b"Hello_World";
1638        let decoded = decode_quoted_printable(data);
1639        assert_eq!(std::str::from_utf8(&decoded).unwrap(), "Hello World");
1640    }
1641
1642    #[test]
1643    fn test_decode_uuencode_no_begin() {
1644        let data = b"not a uuencoded block";
1645        let result = decode_uuencode(data);
1646        assert!(result.is_none());
1647    }
1648
1649    #[test]
1650    fn test_unflow_text_no_trailing_space() {
1651        let input = "Line one.\nLine two.\n";
1652        let result = unflow_text(input);
1653        assert_eq!(result, "Line one.\nLine two.\n");
1654    }
1655
1656    #[test]
1657    fn test_process_mime_body_format_flowed() {
1658        let headers = vec![(
1659            "content-type".to_string(),
1660            "text/plain; charset=utf-8; format=flowed".to_string(),
1661        )];
1662        let body = b"This is a long \nline that flows.\n";
1663        let (decoded, _) = process_mime_body(&headers, body);
1664        assert!(decoded.contains("This is a long line that flows."), "got: {}", decoded);
1665    }
1666}
hypermail/mime.rs

hypermail/
mime.rs