use crate::common::{entities::DecodedValue, errors::CodecError, traits::SystemDecoder}; #[derive(Debug, Default, Clone)] pub struct Rule { pub phoneme_in: String, pub phoneme_out: String, pub not_before: Vec, pub not_after: Vec, pub only_before: Vec, pub only_after: Vec, } impl Rule { pub fn into_lowercase(self) -> Self { Rule { phoneme_in: self.phoneme_in.to_lowercase(), phoneme_out: self.phoneme_out.to_lowercase(), not_before: Self::lower_vec(self.not_before), not_after: Self::lower_vec(self.not_after), only_before: Self::lower_vec(self.only_before), only_after: Self::lower_vec(self.only_after), } } fn lower_vec(vec: Vec) -> Vec { vec.into_iter().map(|s| s.to_lowercase()).collect() } } pub type Rules = Vec; // pub struct rules { // name: String, // entries: Rules, // } /// (index, decoded value) type RuleMatches = Vec<(usize, String)>; pub struct Decoder { rules: Rules, } impl Decoder { pub fn new(rules: Rules) -> Self { Decoder { rules: Decoder::to_lower_rules(rules), } } fn to_lower_rules(rules: Rules) -> Rules { rules .into_iter() .map(|entry| entry.into_lowercase()) .collect() } fn match_entry(&self, entry: &Rule, word: &str) -> RuleMatches { word.match_indices(&entry.phoneme_in) .filter(|(index, _)| self.is_context_matched(&entry, &word, *index)) .map(|(index, _)| (index, entry.phoneme_out.clone())) .collect() } fn is_context_matched(&self, entry: &Rule, word: &str, index: usize) -> bool { let before_context = &word[..index]; let after_context = &word[index + entry.phoneme_in.len()..]; // dbg!(&before_context); // dbg!(&after_context); if entry .not_after .iter() .any(|prefix| before_context.ends_with(prefix)) { return false; } if entry .not_before .iter() .any(|suffix| after_context.starts_with(suffix)) { return false; } if !entry.only_after.is_empty() && entry .only_after .iter() .all(|prefix| !before_context.ends_with(prefix)) { return false; } if !entry.only_before.is_empty() && entry .only_before .iter() .all(|suffix| !after_context.starts_with(suffix)) { return false; } true } } impl SystemDecoder for Decoder { fn decode(&self, word: &str) -> Result { let mut matches: RuleMatches = self .rules .iter() .flat_map(|entry| self.match_entry(&entry, &word.to_lowercase())) .collect(); matches.sort_by_key(|&(pos, _)| pos); let num_str: String = matches.into_iter().map(|(_, value)| value).collect(); DecodedValue::new(num_str) } }