Skip to main content

hypermail/
i18n.rs

1use std::collections::HashMap;
2use std::sync::OnceLock;
3
4/// All locale JSON files embedded at compile time.
5/// Keys are the BCP 47 / ISO 639-1 language tag (or `x-*` for private-use).
6/// `grc` uses the ISO 639-2 code for Ancient Greek.
7macro_rules! loc {
8    ($code:literal) => {
9        ($code, include_str!(concat!("locale/", $code, ".json")))
10    };
11}
12
13static LOCALES: &[(&str, &str)] = &[
14    loc!("aa"),
15    loc!("ab"),
16    loc!("ae"),
17    loc!("af"),
18    loc!("ak"),
19    loc!("am"),
20    loc!("an"),
21    loc!("ar"),
22    loc!("as"),
23    loc!("av"),
24    loc!("ay"),
25    loc!("az"),
26    loc!("ba"),
27    loc!("be"),
28    loc!("bg"),
29    loc!("bi"),
30    loc!("bm"),
31    loc!("bn"),
32    loc!("bo"),
33    loc!("br"),
34    loc!("bs"),
35    loc!("ca"),
36    loc!("ce"),
37    loc!("ch"),
38    loc!("co"),
39    loc!("cr"),
40    loc!("cs"),
41    loc!("cu"),
42    loc!("cv"),
43    loc!("cy"),
44    loc!("da"),
45    loc!("de"),
46    loc!("dv"),
47    loc!("dz"),
48    loc!("ee"),
49    loc!("el"),
50    loc!("en"),
51    loc!("eo"),
52    loc!("es"),
53    loc!("et"),
54    loc!("eu"),
55    loc!("fa"),
56    loc!("ff"),
57    loc!("fi"),
58    loc!("fj"),
59    loc!("fo"),
60    loc!("fr"),
61    loc!("fy"),
62    loc!("ga"),
63    loc!("gd"),
64    loc!("gl"),
65    loc!("gn"),
66    loc!("grc"),
67    loc!("gu"),
68    loc!("gv"),
69    loc!("ha"),
70    loc!("he"),
71    loc!("hi"),
72    loc!("hr"),
73    loc!("hu"),
74    loc!("hy"),
75    loc!("hz"),
76    loc!("ia"),
77    loc!("id"),
78    loc!("ie"),
79    loc!("ig"),
80    loc!("ii"),
81    loc!("ik"),
82    loc!("io"),
83    loc!("is"),
84    loc!("it"),
85    loc!("iu"),
86    loc!("ja"),
87    loc!("jv"),
88    loc!("ka"),
89    loc!("kg"),
90    loc!("ki"),
91    loc!("kj"),
92    loc!("kk"),
93    loc!("kl"),
94    loc!("km"),
95    loc!("kn"),
96    loc!("ko"),
97    loc!("kr"),
98    loc!("ks"),
99    loc!("ku"),
100    loc!("kv"),
101    loc!("kw"),
102    loc!("ky"),
103    loc!("la"),
104    loc!("lb"),
105    loc!("lg"),
106    loc!("li"),
107    loc!("ln"),
108    loc!("lo"),
109    loc!("lt"),
110    loc!("lu"),
111    loc!("lv"),
112    loc!("mg"),
113    loc!("mh"),
114    loc!("mi"),
115    loc!("ml"),
116    loc!("mn"),
117    loc!("mr"),
118    loc!("ms"),
119    loc!("mt"),
120    loc!("my"),
121    loc!("na"),
122    loc!("nb"),
123    loc!("nd"),
124    loc!("ne"),
125    loc!("ng"),
126    loc!("nl"),
127    loc!("nn"),
128    loc!("nr"),
129    loc!("nv"),
130    loc!("ny"),
131    loc!("oc"),
132    loc!("oj"),
133    loc!("om"),
134    loc!("or"),
135    loc!("os"),
136    loc!("pa"),
137    loc!("pi"),
138    loc!("pl"),
139    loc!("ps"),
140    loc!("pt"),
141    loc!("qu"),
142    loc!("rm"),
143    loc!("rn"),
144    loc!("ro"),
145    loc!("ru"),
146    loc!("rw"),
147    loc!("sa"),
148    loc!("sc"),
149    loc!("sd"),
150    loc!("se"),
151    loc!("sg"),
152    loc!("si"),
153    loc!("sk"),
154    loc!("sl"),
155    loc!("sm"),
156    loc!("sn"),
157    loc!("so"),
158    loc!("sq"),
159    loc!("sr"),
160    loc!("ss"),
161    loc!("st"),
162    loc!("su"),
163    loc!("sv"),
164    loc!("sw"),
165    loc!("ta"),
166    loc!("te"),
167    loc!("tg"),
168    loc!("th"),
169    loc!("ti"),
170    loc!("tk"),
171    loc!("tl"),
172    loc!("tn"),
173    loc!("to"),
174    loc!("tr"),
175    loc!("ts"),
176    loc!("tt"),
177    loc!("tw"),
178    loc!("ty"),
179    loc!("ug"),
180    loc!("uk"),
181    loc!("ur"),
182    loc!("uz"),
183    loc!("va"),
184    loc!("ve"),
185    loc!("vi"),
186    loc!("vo"),
187    loc!("wa"),
188    loc!("wo"),
189    loc!("xh"),
190    loc!("yi"),
191    loc!("yo"),
192    loc!("za"),
193    loc!("zh"),
194    loc!("zh-tw"),
195    loc!("zu"),
196    // ISO 639-2 (not in 639-1)
197    loc!("grc"),
198    // IETF BCP 47 private-use (x-*) — fictional / constructed languages
199    loc!("x-dothraki"),
200    loc!("x-klingon"),
201    loc!("x-lojban"),
202    loc!("x-navii"),
203    loc!("x-quenya"),
204    loc!("x-sindarin"),
205    loc!("x-valyrian"),
206];
207
208/// Normalise legacy and alias language tags to our canonical codes.
209fn normalise(lang: &str) -> &str {
210    // Case-fold to lowercase before comparing; the BCP 47 tags we store are
211    // already lowercase, so we only need to handle well-known legacy aliases.
212    match lang {
213        // Deprecated ISO 639-1 / common aliases
214        "gr" => "el", // informal alias for Greek
215        "no" => "nb", // Norwegian → Bokmål
216        "in" => "id", // old Indonesian code
217        "iw" => "he", // old Hebrew code
218        "ji" => "yi", // old Yiddish code
219        "jw" => "jv", // old Javanese code
220        // Simplified-Chinese variants
221        "zh-cn" | "zh-hans" | "zh-sg" => "zh",
222        // Traditional-Chinese variants
223        "zh-hant" | "zh-hk" => "zh-tw",
224        other => other,
225    }
226}
227
228/// Pre-parsed locale maps, initialized once on first access.
229static PARSED_LOCALES: OnceLock<HashMap<&'static str, HashMap<String, String>>> = OnceLock::new();
230
231fn parsed_locales() -> &'static HashMap<&'static str, HashMap<String, String>> {
232    PARSED_LOCALES.get_or_init(|| {
233        let mut map = HashMap::with_capacity(LOCALES.len());
234        for &(code, json_str) in LOCALES {
235            let mut strings = HashMap::new();
236            if let Ok(serde_json::Value::Object(obj)) = serde_json::from_str(json_str) {
237                for (k, v) in obj {
238                    if k != "_comment" {
239                        if let serde_json::Value::String(s) = v {
240                            strings.insert(k, s);
241                        }
242                    }
243                }
244            }
245            map.insert(code, strings);
246        }
247        map
248    })
249}
250
251/// Internationalization lookup table for UI strings, loaded from embedded JSON locale files.
252pub struct I18n {
253    strings: &'static HashMap<String, String>,
254}
255
256impl I18n {
257    /// Create an `I18n` instance for the given BCP 47 language tag.
258    ///
259    /// Resolution order:
260    /// 1. Exact match after alias normalisation (e.g. `"el"`, `"x-klingon"`)
261    /// 2. Base subtag (e.g. `"pt-BR"` → `"pt"`)
262    /// 3. English fallback
263    pub fn new(language: &str) -> Self {
264        let lang = normalise(language);
265        let locales = parsed_locales();
266
267        let strings = locales
268            .get(lang)
269            .or_else(|| {
270                // strip subtag: "pt-BR" → "pt"
271                let base = lang.split('-').next().unwrap_or(lang);
272                if base != lang {
273                    locales.get(base)
274                } else {
275                    None
276                }
277            })
278            .or_else(|| locales.get("en"))
279            .expect("English locale must exist");
280
281        Self { strings }
282    }
283
284    /// Return the localised string for `key`, or `key` itself if not found.
285    pub fn get<'a>(&'a self, key: &'a str) -> &'a str {
286        self.strings.get(key).map(|s| s.as_str()).unwrap_or(key)
287    }
288
289    /// Return an iterator over all known language codes (for tooling / docs).
290    pub fn known_languages() -> impl Iterator<Item = &'static str> {
291        LOCALES.iter().map(|(code, _)| *code)
292    }
293}
294
295// ─── Tests ──────────────────────────────────────────────────────────────────
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    const REQUIRED_KEYS: &[&str] = &[
302        "From",
303        "Date",
304        "Subject",
305        "Message-ID",
306        "References",
307        "In-Reply-To",
308        "Attachment",
309        "Author:",
310        "Next",
311        "Previous",
312        "Index",
313        "Thread",
314        "Date Index",
315        "Subject Index",
316        "Author Index",
317        "Thread Index",
318        "search",
319        "next",
320        "previous",
321        "no subject",
322        "unknown author",
323        "Article",
324        "Deleted message",
325        "Expired message",
326        "[Deleted]",
327        "[Expired]",
328        "No messages found.",
329        "Sun",
330        "Mon",
331        "Tue",
332        "Wed",
333        "Thu",
334        "Fri",
335        "Sat",
336        "Jan",
337        "Feb",
338        "Mar",
339        "Apr",
340        "May",
341        "Jun",
342        "Jul",
343        "Aug",
344        "Sep",
345        "Oct",
346        "Nov",
347        "Dec",
348    ];
349
350    // ── English sanity ────────────────────────────────────────────────────
351
352    #[test]
353    fn english_basic() {
354        let i = I18n::new("en");
355        assert_eq!(i.get("From"), "From:");
356        assert_eq!(i.get("Subject"), "Subject:");
357        assert_eq!(i.get("Index"), "Index");
358    }
359
360    // ── Unknown key falls back to the key itself ──────────────────────────
361
362    #[test]
363    fn unknown_key_returns_key() {
364        let i = I18n::new("en");
365        assert_eq!(i.get("NoSuchKey"), "NoSuchKey");
366    }
367
368    // ── Unknown language falls back to English ────────────────────────────
369
370    #[test]
371    fn unknown_language_falls_back_to_english() {
372        let i = I18n::new("xx-unknown");
373        assert_eq!(i.get("From"), "From:");
374    }
375
376    // ── Aliases ───────────────────────────────────────────────────────────
377
378    #[test]
379    fn alias_gr_resolves_to_el() {
380        let gr = I18n::new("gr");
381        let el = I18n::new("el");
382        assert_eq!(gr.get("From"), el.get("From"));
383        assert_eq!(gr.get("unknown author"), el.get("unknown author"));
384    }
385
386    #[test]
387    fn alias_no_resolves_to_nb() {
388        let no = I18n::new("no");
389        let nb = I18n::new("nb");
390        assert_eq!(no.get("From"), nb.get("From"));
391    }
392
393    #[test]
394    fn alias_in_resolves_to_id() {
395        let i_in = I18n::new("in");
396        let i_id = I18n::new("id");
397        assert_eq!(i_in.get("From"), i_id.get("From"));
398    }
399
400    #[test]
401    fn alias_iw_resolves_to_he() {
402        let iw = I18n::new("iw");
403        let he = I18n::new("he");
404        assert_eq!(iw.get("From"), he.get("From"));
405    }
406
407    #[test]
408    fn alias_zh_cn_resolves_to_zh() {
409        let cn = I18n::new("zh-cn");
410        let zh = I18n::new("zh");
411        assert_eq!(cn.get("From"), zh.get("From"));
412    }
413
414    #[test]
415    fn alias_zh_hk_resolves_to_zh_tw() {
416        let hk = I18n::new("zh-hk");
417        let tw = I18n::new("zh-tw");
418        assert_eq!(hk.get("From"), tw.get("From"));
419    }
420
421    // ── Subtag stripping ──────────────────────────────────────────────────
422
423    #[test]
424    fn subtag_pt_br_resolves_to_pt() {
425        let br = I18n::new("pt-BR");
426        let pt = I18n::new("pt");
427        assert_eq!(br.get("From"), pt.get("From"));
428    }
429
430    // ── Non-English languages are actually different from English ─────────
431
432    #[test]
433    fn greek_not_english() {
434        let el = I18n::new("el");
435        assert_ne!(el.get("From"), "From:");
436        assert_ne!(el.get("unknown author"), "Unknown");
437    }
438
439    #[test]
440    fn german_not_english() {
441        let de = I18n::new("de");
442        assert_ne!(de.get("From"), "From:");
443    }
444
445    // ── Ancient Greek ─────────────────────────────────────────────────────
446
447    #[test]
448    fn ancient_greek_loads() {
449        let grc = I18n::new("grc");
450        assert_ne!(grc.get("From"), "From:");
451        // Must not fall back to English
452        assert_ne!(grc.get("From"), I18n::new("en").get("From"));
453    }
454
455    // ── x-* fictional languages load and differ from English ─────────────
456
457    #[test]
458    fn x_klingon_loads() {
459        let tlh = I18n::new("x-klingon");
460        assert_ne!(tlh.get("search"), "Search");
461    }
462
463    #[test]
464    fn x_quenya_loads() {
465        let q = I18n::new("x-quenya");
466        assert_ne!(q.get("Index"), "");
467    }
468
469    #[test]
470    fn x_lojban_loads() {
471        let jbo = I18n::new("x-lojban");
472        assert_ne!(jbo.get("From"), "From:");
473    }
474
475    // ── Every locale: all 27 required keys present, no empty value ────────
476
477    #[test]
478    fn all_locales_have_required_keys() {
479        let mut failures: Vec<String> = Vec::new();
480
481        for (code, json_str) in LOCALES {
482            let val: serde_json::Value = serde_json::from_str(json_str)
483                .unwrap_or_else(|e| panic!("JSON parse error in {code}: {e}"));
484            let obj =
485                val.as_object().unwrap_or_else(|| panic!("{code}: root is not a JSON object"));
486
487            // _comment must be present
488            if !obj.contains_key("_comment") {
489                failures.push(format!("{code}: missing _comment"));
490            }
491
492            for key in REQUIRED_KEYS {
493                match obj.get(*key) {
494                    None => failures.push(format!("{code}: missing key [{key}]")),
495                    Some(serde_json::Value::String(s)) if s.is_empty() => {
496                        failures.push(format!("{code}: empty value for [{key}]"))
497                    },
498                    Some(serde_json::Value::String(_)) => {},
499                    Some(other) => {
500                        failures.push(format!("{code}: [{key}] is not a string: {other}"))
501                    },
502                }
503            }
504        }
505
506        if !failures.is_empty() {
507            panic!("Locale validation failures:\n{}", failures.join("\n"));
508        }
509    }
510
511    // ── I18n::new round-trip: every locale resolves without panicking ─────
512
513    #[test]
514    fn all_locales_load_via_new() {
515        for (code, _) in LOCALES {
516            let i = I18n::new(code);
517            // At minimum the key must not be empty
518            assert!(!i.get("From").is_empty(), "{code}: get(From) returned empty");
519            assert!(!i.get("Index").is_empty(), "{code}: get(Index) returned empty");
520        }
521    }
522
523    // ── known_languages() covers expected codes ───────────────────────────
524
525    #[test]
526    fn known_languages_includes_expected_codes() {
527        let langs: Vec<&str> = I18n::known_languages().collect();
528        for expected in &[
529            "en",
530            "de",
531            "fr",
532            "ja",
533            "zh",
534            "ar",
535            "el",
536            "grc",
537            "x-klingon",
538            "x-quenya",
539            "x-lojban",
540        ] {
541            assert!(langs.contains(expected), "known_languages missing: {expected}");
542        }
543    }
544}