You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

87 lines
2.2 KiB

use crate::core::entities::{DictEntries, DictEntry};
use crate::core::traits::SystemEncoder;
/// (index, encoded value)
type DictMatches = Vec<(usize, String)>;
pub struct Encoder {
dict: DictEntries,
}
impl Encoder {
pub fn new(dict: DictEntries) -> Self {
Encoder {
dict: Encoder::to_lower_dict(dict),
}
}
fn to_lower_dict(dict: DictEntries) -> DictEntries {
dict.into_iter()
.map(|entry| entry.into_lowercase())
.collect()
}
fn match_entry(&self, entry: &DictEntry, word: &str) -> DictMatches {
word.match_indices(&entry.phoneme_in)
.filter(|(index, _)| self.is_context_matched(&entry, &word, *index))
.map(|(index, _)| (index, entry.phoneme_out.clone()))
.collect()
}
fn is_context_matched(&self, entry: &DictEntry, word: &str, index: usize) -> bool {
let before_context = &word[..index];
let after_context = &word[index + entry.phoneme_in.len()..];
dbg!(&before_context);
dbg!(&after_context);
if entry
.not_after
.iter()
.any(|prefix| before_context.ends_with(prefix))
{
return false;
}
if entry
.not_before
.iter()
.any(|suffix| after_context.starts_with(suffix))
{
return false;
}
if !entry.only_after.is_empty()
&& entry
.only_after
.iter()
.all(|prefix| !before_context.ends_with(prefix))
{
return false;
}
if !entry.only_before.is_empty()
&& entry
.only_before
.iter()
.all(|suffix| !after_context.starts_with(suffix))
{
return false;
}
true
}
}
impl SystemEncoder for Encoder {
fn encode(&self, word: &str) -> String {
let mut matches: DictMatches = self
.dict
.iter()
.flat_map(|entry| self.match_entry(&entry, &word.to_lowercase()))
.collect();
matches.sort_by_key(|&(pos, _)| pos);
dbg!(&matches);
matches.into_iter().map(|(_, value)| value).collect()
}
}