You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
122 lines
3.2 KiB
122 lines
3.2 KiB
use crate::common::{entities::DecodedValue, errors::CodecError, traits::SystemDecoder}; |
|
|
|
#[derive(Debug, Default, Clone)] |
|
pub struct Rule { |
|
pub phoneme_in: String, |
|
pub phoneme_out: String, |
|
|
|
pub not_before: Vec<String>, |
|
pub not_after: Vec<String>, |
|
|
|
pub only_before: Vec<String>, |
|
pub only_after: Vec<String>, |
|
} |
|
|
|
impl Rule { |
|
pub fn into_lowercase(self) -> Self { |
|
Rule { |
|
phoneme_in: self.phoneme_in.to_lowercase(), |
|
phoneme_out: self.phoneme_out.to_lowercase(), |
|
not_before: Self::lower_vec(self.not_before), |
|
not_after: Self::lower_vec(self.not_after), |
|
only_before: Self::lower_vec(self.only_before), |
|
only_after: Self::lower_vec(self.only_after), |
|
} |
|
} |
|
|
|
fn lower_vec(vec: Vec<String>) -> Vec<String> { |
|
vec.into_iter().map(|s| s.to_lowercase()).collect() |
|
} |
|
} |
|
|
|
pub type Rules = Vec<Rule>; |
|
// pub struct rules { |
|
// name: String, |
|
// entries: Rules, |
|
// } |
|
|
|
/// (index, decoded value) |
|
type RuleMatches = Vec<(usize, String)>; |
|
|
|
pub struct Decoder { |
|
rules: Rules, |
|
} |
|
|
|
impl Decoder { |
|
pub fn new(rules: Rules) -> Self { |
|
Decoder { |
|
rules: Decoder::to_lower_rules(rules), |
|
} |
|
} |
|
|
|
fn to_lower_rules(rules: Rules) -> Rules { |
|
rules |
|
.into_iter() |
|
.map(|entry| entry.into_lowercase()) |
|
.collect() |
|
} |
|
|
|
fn match_entry(&self, entry: &Rule, word: &str) -> RuleMatches { |
|
word.match_indices(&entry.phoneme_in) |
|
.filter(|(index, _)| self.is_context_matched(&entry, &word, *index)) |
|
.map(|(index, _)| (index, entry.phoneme_out.clone())) |
|
.collect() |
|
} |
|
|
|
fn is_context_matched(&self, entry: &Rule, word: &str, index: usize) -> bool { |
|
let before_context = &word[..index]; |
|
let after_context = &word[index + entry.phoneme_in.len()..]; |
|
// dbg!(&before_context); |
|
// dbg!(&after_context); |
|
|
|
if entry |
|
.not_after |
|
.iter() |
|
.any(|prefix| before_context.ends_with(prefix)) |
|
{ |
|
return false; |
|
} |
|
|
|
if entry |
|
.not_before |
|
.iter() |
|
.any(|suffix| after_context.starts_with(suffix)) |
|
{ |
|
return false; |
|
} |
|
|
|
if !entry.only_after.is_empty() |
|
&& entry |
|
.only_after |
|
.iter() |
|
.all(|prefix| !before_context.ends_with(prefix)) |
|
{ |
|
return false; |
|
} |
|
|
|
if !entry.only_before.is_empty() |
|
&& entry |
|
.only_before |
|
.iter() |
|
.all(|suffix| !after_context.starts_with(suffix)) |
|
{ |
|
return false; |
|
} |
|
|
|
true |
|
} |
|
} |
|
|
|
impl SystemDecoder for Decoder { |
|
fn decode(&self, word: &str) -> Result<DecodedValue, CodecError> { |
|
let mut matches: RuleMatches = self |
|
.rules |
|
.iter() |
|
.flat_map(|entry| self.match_entry(&entry, &word.to_lowercase())) |
|
.collect(); |
|
|
|
matches.sort_by_key(|&(pos, _)| pos); |
|
let num_str: String = matches.into_iter().map(|(_, value)| value).collect(); |
|
DecodedValue::new(num_str) |
|
} |
|
}
|
|
|