use crate::core::entities::{DictEntries, DictEntry}; use crate::core::traits::SystemEncoder; /// (index, encoded value) type DictMatches = Vec<(usize, String)>; pub struct Encoder { dict: DictEntries, } impl Encoder { pub fn new(dict: DictEntries) -> Self { Encoder { dict: Encoder::to_lower_dict(dict), } } fn to_lower_dict(dict: DictEntries) -> DictEntries { dict.into_iter() .map(|entry| entry.into_lowercase()) .collect() } fn match_entry(&self, entry: &DictEntry, word: &str) -> DictMatches { word.match_indices(&entry.phoneme_in) .filter(|(index, _)| self.is_context_matched(&entry, &word, *index)) .map(|(index, _)| (index, entry.phoneme_out.clone())) .collect() } fn is_context_matched(&self, entry: &DictEntry, word: &str, index: usize) -> bool { let before_context = &word[..index]; let after_context = &word[index + entry.phoneme_in.len()..]; dbg!(&before_context); dbg!(&after_context); if entry .not_after .iter() .any(|prefix| before_context.ends_with(prefix)) { return false; } if entry .not_before .iter() .any(|suffix| after_context.starts_with(suffix)) { return false; } if !entry.only_after.is_empty() && entry .only_after .iter() .all(|prefix| !before_context.ends_with(prefix)) { return false; } if !entry.only_before.is_empty() && entry .only_before .iter() .all(|suffix| !after_context.starts_with(suffix)) { return false; } true } } impl SystemEncoder for Encoder { fn encode(&self, word: &str) -> String { let mut matches: DictMatches = self .dict .iter() .flat_map(|entry| self.match_entry(&entry, &word.to_lowercase())) .collect(); matches.sort_by_key(|&(pos, _)| pos); dbg!(&matches); matches.into_iter().map(|(_, value)| value).collect() } }