You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

122 lines
3.2 KiB

use crate::common::{entities::DecodedValue, errors::CodecError, traits::SystemDecoder};
#[derive(Debug, Default, Clone)]
pub struct Rule {
pub phoneme_in: String,
pub phoneme_out: String,
pub not_before: Vec<String>,
pub not_after: Vec<String>,
pub only_before: Vec<String>,
pub only_after: Vec<String>,
}
impl Rule {
pub fn into_lowercase(self) -> Self {
Rule {
phoneme_in: self.phoneme_in.to_lowercase(),
phoneme_out: self.phoneme_out.to_lowercase(),
not_before: Self::lower_vec(self.not_before),
not_after: Self::lower_vec(self.not_after),
only_before: Self::lower_vec(self.only_before),
only_after: Self::lower_vec(self.only_after),
}
}
fn lower_vec(vec: Vec<String>) -> Vec<String> {
vec.into_iter().map(|s| s.to_lowercase()).collect()
}
}
pub type Rules = Vec<Rule>;
// pub struct rules {
// name: String,
// entries: Rules,
// }
/// (index, decoded value)
type RuleMatches = Vec<(usize, String)>;
pub struct Decoder {
rules: Rules,
}
impl Decoder {
pub fn new(rules: Rules) -> Self {
Decoder {
rules: Decoder::to_lower_rules(rules),
}
}
fn to_lower_rules(rules: Rules) -> Rules {
rules
.into_iter()
.map(|entry| entry.into_lowercase())
.collect()
}
fn match_entry(&self, entry: &Rule, word: &str) -> RuleMatches {
word.match_indices(&entry.phoneme_in)
.filter(|(index, _)| self.is_context_matched(&entry, &word, *index))
.map(|(index, _)| (index, entry.phoneme_out.clone()))
.collect()
}
fn is_context_matched(&self, entry: &Rule, word: &str, index: usize) -> bool {
let before_context = &word[..index];
let after_context = &word[index + entry.phoneme_in.len()..];
// dbg!(&before_context);
// dbg!(&after_context);
if entry
.not_after
.iter()
.any(|prefix| before_context.ends_with(prefix))
{
return false;
}
if entry
.not_before
.iter()
.any(|suffix| after_context.starts_with(suffix))
{
return false;
}
if !entry.only_after.is_empty()
&& entry
.only_after
.iter()
.all(|prefix| !before_context.ends_with(prefix))
{
return false;
}
if !entry.only_before.is_empty()
&& entry
.only_before
.iter()
.all(|suffix| !after_context.starts_with(suffix))
{
return false;
}
true
}
}
impl SystemDecoder for Decoder {
fn decode(&self, word: &str) -> Result<DecodedValue, CodecError> {
let mut matches: RuleMatches = self
.rules
.iter()
.flat_map(|entry| self.match_entry(&entry, &word.to_lowercase()))
.collect();
matches.sort_by_key(|&(pos, _)| pos);
let num_str: String = matches.into_iter().map(|(_, value)| value).collect();
DecodedValue::new(num_str)
}
}