Browse Source

WIP project structure and encoder first impl

develop-refactor
chodak166 6 months ago
parent
commit
ff94981dde
  1. 1
      lib/Cargo.toml
  2. 5
      lib/src/core/major.rs
  3. 15
      lib/src/core/major/dict_en.rs
  4. 15
      lib/src/core/major/dict_pl.rs
  5. 113
      lib/src/core/major/encoder.rs
  6. 11
      lib/src/core/major/tests.rs
  7. 23
      lib/src/core/major_system_encoder.rs
  8. 8
      lib/src/core/mod.rs
  9. 6
      lib/src/core/system.rs
  10. 0
      lib/src/core/traits.rs

1
lib/Cargo.toml

@ -4,3 +4,4 @@ version = "0.1.0"
edition = "2024"
[dependencies]
once_cell = "1.21.3"

5
lib/src/core/major.rs

@ -0,0 +1,5 @@
pub mod dict_en;
pub mod dict_pl;
mod encoder;
pub use encoder::*;

15
lib/src/core/major/dict_en.rs

@ -0,0 +1,15 @@
use crate::core::major::{Dict, DictEntry};
pub fn get_dict() -> Dict {
vec![
DictEntry {
phoneme_in: "EN".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["Y".to_string()],
not_before: vec!["X".to_string()],
only_after: vec!["A".to_string()],
only_before: vec!["C".to_string()],
},
// ...more entries...
]
}

15
lib/src/core/major/dict_pl.rs

@ -0,0 +1,15 @@
use crate::core::major::{Dict, DictEntry};
pub fn get_dict() -> Dict {
vec![
DictEntry {
phoneme_in: "PL".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["Y".to_string()],
not_before: vec!["X".to_string()],
only_after: vec!["A".to_string()],
only_before: vec!["C".to_string()],
},
// ...more entries...
]
}

113
lib/src/core/major/encoder.rs

@ -0,0 +1,113 @@
use crate::core::traits::SystemEncoder;
#[derive(Debug, Default, Clone)]
pub struct DictEntry {
pub phoneme_in: String,
pub phoneme_out: String,
pub not_before: Vec<String>,
pub not_after: Vec<String>,
pub only_before: Vec<String>,
pub only_after: Vec<String>,
}
pub type Dict = Vec<DictEntry>;
/// (index, encoded value)
type DictMatches = Vec<(usize, String)>;
pub struct Encoder {
dict: Dict,
}
impl Encoder {
pub fn new(dict: Dict) -> Self {
Encoder { dict: dict }
}
fn match_entry(&self, entry: &DictEntry, word: &str) -> DictMatches {
word.match_indices(&entry.phoneme_in)
.filter(|(index, _)| self.is_context_matched(&entry, &word, *index))
.map(|(index, _)| (index, entry.phoneme_out.clone()))
.collect()
}
fn is_context_matched(&self, entry: &DictEntry, word: &str, index: usize) -> bool {
let before_context = &word[..index];
let after_context = &word[index + entry.phoneme_in.len()..];
dbg!(&before_context);
dbg!(&after_context);
if entry
.not_after
.iter()
.any(|prefix| before_context.ends_with(prefix))
{
return false;
}
if entry
.not_before
.iter()
.any(|suffix| after_context.starts_with(suffix))
{
return false;
}
if entry
.only_after
.iter()
.all(|prefix| !before_context.ends_with(prefix))
{
return false;
}
if entry
.only_before
.iter()
.all(|suffix| !after_context.starts_with(suffix))
{
return false;
}
true
}
}
impl SystemEncoder for Encoder {
fn encode(&self, word: &str) -> String {
let mut matches: DictMatches = self
.dict
.iter()
.flat_map(|entry| self.match_entry(&entry, &word))
.collect();
matches.sort_by_key(|&(pos, _)| pos);
dbg!(&matches);
matches.into_iter().map(|(_, value)| value).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_basic_dict() -> Dict {
vec![DictEntry {
phoneme_in: "B".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["Y".to_string()],
not_before: vec!["X".to_string()],
only_after: vec!["A".to_string()],
only_before: vec!["C".to_string()],
}]
}
#[test]
fn test_single_symbol_encoding_all_reqirements_met() {
let encoder = Encoder::new(create_basic_dict());
let output = encoder.encode("ABC");
assert_eq!(output, "2")
}
}

11
lib/src/core/major/tests.rs

@ -0,0 +1,11 @@
#[cfg(test)]
mod tests {
use super::*;
// #[test]
// fn test_processing() {
// let processor = TextProcessor::new(">> ");
// assert_eq!(processor.process("hello"), ">> HELLO");
// }
}

23
lib/src/core/major_system_encoder.rs

@ -1,23 +0,0 @@
use crate::core::SystemEncoder;
pub struct MajorEncoder {
dict: String, // TODO
}
impl MajorEncoder {
pub fn new(dict: &str) -> Self {
MajorEncoder {
dict: String::from(dict),
}
}
}
impl SystemEncoder for MajorEncoder {
fn encode(&self, word: &str) -> String {
let num_word: String = word
.chars()
.map(|c| c.to_digit(10).unwrap_or(0).to_string())
.collect();
format!("{}_{} -> {}", word, self.dict, num_word)
}
}

8
lib/src/core/mod.rs

@ -1,7 +1,7 @@
pub mod major_system_encoder;
pub mod major;
pub mod system;
pub mod system_encoder;
pub mod traits;
pub use self::major_system_encoder::*;
pub use self::major::*;
pub use self::system::*;
pub use self::system_encoder::*;
pub use self::traits::*;

6
lib/src/core/system.rs

@ -1,5 +1,5 @@
use crate::core::MajorEncoder;
use crate::core::SystemEncoder;
use crate::core::major;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum System {
@ -9,7 +9,7 @@ pub enum System {
pub fn create_encoder(system: &System) -> Box<dyn SystemEncoder> {
match system {
System::MajorPl => Box::new(MajorEncoder::new("dict-major-pl")),
System::MajorEn => Box::new(MajorEncoder::new("dict-major-en")),
System::MajorPl => Box::new(major::Encoder::new(major::dict_pl::get_dict())),
System::MajorEn => Box::new(major::Encoder::new(major::dict_en::get_dict())),
}
}

0
lib/src/core/system_encoder.rs → lib/src/core/traits.rs

Loading…
Cancel
Save