1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::config::Config;
5use crate::error::Result;
6use crate::message::EmailInfo;
7use crate::structs::EmailStore;
8
9type BigramIndex = HashMap<String, Vec<usize>>;
10
11pub fn build_bigram_index(store: &EmailStore) -> BigramIndex {
12 let mut index: BigramIndex = HashMap::new();
13
14 for (i, email) in store.emails.iter().enumerate() {
15 let text = extract_searchable_text(email);
16 let bigrams = extract_bigrams(&text);
17 for bigram in bigrams {
18 index.entry(bigram).or_default().push(i);
19 }
20 }
21
22 index
23}
24
25pub fn search_bigrams(query: &str, index: &BigramIndex, _store: &EmailStore) -> Vec<usize> {
26 let query_bigrams = extract_bigrams(query);
27 if query_bigrams.is_empty() {
28 return Vec::new();
29 }
30
31 let mut result_scores: HashMap<usize, usize> = HashMap::new();
32
33 for bigram in &query_bigrams {
34 if let Some(entries) = index.get(bigram) {
35 for &idx in entries {
36 *result_scores.entry(idx).or_default() += 1;
37 }
38 }
39 }
40
41 let threshold = query_bigrams.len().saturating_sub(1);
42 let mut results: Vec<(usize, usize)> =
43 result_scores.into_iter().filter(|(_, score)| *score >= threshold).collect();
44
45 results.sort_by_key(|b| std::cmp::Reverse(b.1));
46
47 results.into_iter().map(|(idx, _)| idx).collect()
48}
49
50fn extract_searchable_text(email: &EmailInfo) -> String {
51 let mut text = String::new();
52
53 if let Some(ref subject) = email.subject {
54 text.push_str(subject);
55 text.push(' ');
56 }
57 if let Some(ref name) = email.name {
58 text.push_str(name);
59 text.push(' ');
60 }
61 if let Some(ref addr) = email.email_addr {
62 text.push_str(addr);
63 text.push(' ');
64 }
65
66 for body in &email.bodylist.bodies {
67 if !body.attached && !body.header {
68 text.push_str(&body.line);
69 text.push(' ');
70 }
71 }
72
73 text
74}
75
76fn extract_bigrams(text: &str) -> Vec<String> {
77 let lower = text.to_lowercase();
78 let chars: Vec<char> =
79 lower.chars().filter(|c| c.is_alphanumeric() || c.is_whitespace()).collect();
80
81 let mut bigrams = HashSet::new();
82 for window in chars.windows(2) {
83 let bigram: String = window.iter().collect();
84 bigrams.insert(bigram);
85 }
86
87 bigrams.into_iter().collect()
88}
89
90pub fn write_search_index(store: &EmailStore, config: &Config) -> Result<String> {
91 let dir = config.dir.as_deref().unwrap_or(".");
92 let search_path = Path::new(dir).join("search_index.txt");
93
94 let mut content = String::new();
95 for email in &store.emails {
96 let text = extract_searchable_text(email);
97 let line = format!("{}|{}", email.msgnum, sanitize_for_search(&text));
98 content.push_str(&line);
99 content.push('\n');
100 }
101
102 std::fs::write(&search_path, &content)?;
103 Ok(content)
104}
105
106fn sanitize_for_search(s: &str) -> String {
107 s.chars()
108 .filter(|c| c.is_alphanumeric() || c.is_whitespace() || *c == '.' || *c == '@' || *c == '-')
109 .collect()
110}
111
112#[cfg(test)]
113mod tests {
114 use super::*;
115 use crate::message::{Body, BodyChain, EmailInfo};
116
117 fn make_email(msgnum: i32, subject: &str, name: &str, body_text: &str) -> EmailInfo {
118 EmailInfo {
119 msgnum,
120 subject: Some(subject.to_string()),
121 name: Some(name.to_string()),
122 bodylist: BodyChain {
123 bodies: vec![Body {
124 line: body_text.to_string(),
125 html: false,
126 header: false,
127 parsed_header: false,
128 attached: false,
129 demimed: false,
130 msgnum,
131 }],
132 },
133 ..Default::default()
134 }
135 }
136
137 #[test]
138 fn test_bigram_extraction() {
139 let bigrams = extract_bigrams("hello");
140 assert!(bigrams.contains(&"he".to_string()));
141 assert!(bigrams.contains(&"el".to_string()));
142 assert!(bigrams.contains(&"ll".to_string()));
143 assert!(bigrams.contains(&"lo".to_string()));
144 }
145
146 #[test]
147 fn test_bigram_index() {
148 let mut store = EmailStore::new();
149 store.add_email(make_email(1, "Hello World", "Alice", "This is a test"));
150 store.add_email(make_email(2, "Goodbye World", "Bob", "Another test message"));
151
152 let index = build_bigram_index(&store);
153 assert!(index.contains_key("wo"));
154 assert!(index.contains_key("te"));
155 }
156
157 #[test]
158 fn test_search() {
159 let mut store = EmailStore::new();
160 store.add_email(make_email(1, "Hello World", "Alice", "This is a test message"));
161 store.add_email(make_email(2, "Goodbye World", "Bob", "Another different message"));
162
163 let index = build_bigram_index(&store);
164 let results = search_bigrams("hello", &index, &store);
165 assert_eq!(results.len(), 1);
166 assert_eq!(store.emails[results[0]].msgnum, 1);
167 }
168
169 #[test]
170 fn test_extract_searchable_text() {
171 let email = make_email(1, "Test Subject", "Alice", "Body content");
172 let text = extract_searchable_text(&email);
173 assert!(text.contains("Test Subject"));
174 assert!(text.contains("Alice"));
175 assert!(text.contains("Body content"));
176 }
177}