You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
87 lines
2.2 KiB
87 lines
2.2 KiB
use crate::core::entities::{DictEntries, DictEntry}; |
|
use crate::core::traits::SystemEncoder; |
|
|
|
/// (index, encoded value) |
|
type DictMatches = Vec<(usize, String)>; |
|
|
|
pub struct Encoder { |
|
dict: DictEntries, |
|
} |
|
|
|
impl Encoder { |
|
pub fn new(dict: DictEntries) -> Self { |
|
Encoder { |
|
dict: Encoder::to_lower_dict(dict), |
|
} |
|
} |
|
|
|
fn to_lower_dict(dict: DictEntries) -> DictEntries { |
|
dict.into_iter() |
|
.map(|entry| entry.into_lowercase()) |
|
.collect() |
|
} |
|
|
|
fn match_entry(&self, entry: &DictEntry, word: &str) -> DictMatches { |
|
word.match_indices(&entry.phoneme_in) |
|
.filter(|(index, _)| self.is_context_matched(&entry, &word, *index)) |
|
.map(|(index, _)| (index, entry.phoneme_out.clone())) |
|
.collect() |
|
} |
|
|
|
fn is_context_matched(&self, entry: &DictEntry, word: &str, index: usize) -> bool { |
|
let before_context = &word[..index]; |
|
let after_context = &word[index + entry.phoneme_in.len()..]; |
|
dbg!(&before_context); |
|
dbg!(&after_context); |
|
|
|
if entry |
|
.not_after |
|
.iter() |
|
.any(|prefix| before_context.ends_with(prefix)) |
|
{ |
|
return false; |
|
} |
|
|
|
if entry |
|
.not_before |
|
.iter() |
|
.any(|suffix| after_context.starts_with(suffix)) |
|
{ |
|
return false; |
|
} |
|
|
|
if !entry.only_after.is_empty() |
|
&& entry |
|
.only_after |
|
.iter() |
|
.all(|prefix| !before_context.ends_with(prefix)) |
|
{ |
|
return false; |
|
} |
|
|
|
if !entry.only_before.is_empty() |
|
&& entry |
|
.only_before |
|
.iter() |
|
.all(|suffix| !after_context.starts_with(suffix)) |
|
{ |
|
return false; |
|
} |
|
|
|
true |
|
} |
|
} |
|
|
|
impl SystemEncoder for Encoder { |
|
fn encode(&self, word: &str) -> String { |
|
let mut matches: DictMatches = self |
|
.dict |
|
.iter() |
|
.flat_map(|entry| self.match_entry(&entry, &word.to_lowercase())) |
|
.collect(); |
|
|
|
matches.sort_by_key(|&(pos, _)| pos); |
|
dbg!(&matches); |
|
matches.into_iter().map(|(_, value)| value).collect() |
|
} |
|
}
|
|
|