Browse Source

Added major system implementation and tests

develop-refactor
chodak166 6 months ago
parent
commit
b7110d8d85
  1. 4
      Cargo.toml
  2. 4
      app/Cargo.toml
  3. 3
      lib/src/core/major.rs
  4. 58
      lib/src/core/major/encoder.rs
  5. 120
      lib/src/core/major/tests.rs
  6. 2
      lib/src/core/mod.rs

4
Cargo.toml

@ -9,3 +9,7 @@ resolver = "3"
edition = "2024"
authors = ["chodak166 <chodak166@op.pl>"]
license = "MIT"
[profile.release]
lto = true # Link Time Optimization: Analyzes entire program for optimizations
codegen-units = 1 # Forces single-threaded compilation (slower build, but smaller/faster binary)

4
app/Cargo.toml

@ -6,7 +6,3 @@ edition = "2024"
[dependencies]
clap = { version = "4.5.51", features = ["derive"] }
libmnemor = { path = "../lib" }
[profile.release]
lto = true # Link Time Optimization: Analyzes entire program for optimizations
codegen-units = 1 # Forces single-threaded compilation (slower build, but smaller/faster binary)

3
lib/src/core/major.rs

@ -2,4 +2,7 @@ pub mod dict_en;
pub mod dict_pl;
mod encoder;
#[cfg(test)]
mod tests;
pub use encoder::*;

58
lib/src/core/major/encoder.rs

@ -12,6 +12,23 @@ pub struct DictEntry {
pub only_after: Vec<String>,
}
impl DictEntry {
pub fn into_lowercase(self) -> Self {
DictEntry {
phoneme_in: self.phoneme_in.to_lowercase(),
phoneme_out: self.phoneme_out.to_lowercase(),
not_before: Self::lower_vec(self.not_before),
not_after: Self::lower_vec(self.not_after),
only_before: Self::lower_vec(self.only_before),
only_after: Self::lower_vec(self.only_after),
}
}
fn lower_vec(vec: Vec<String>) -> Vec<String> {
vec.into_iter().map(|s| s.to_lowercase()).collect()
}
}
pub type Dict = Vec<DictEntry>;
/// (index, encoded value)
@ -23,7 +40,15 @@ pub struct Encoder {
impl Encoder {
pub fn new(dict: Dict) -> Self {
Encoder { dict: dict }
Encoder {
dict: Encoder::to_lower_dict(dict),
}
}
fn to_lower_dict(dict: Dict) -> Dict {
dict.into_iter()
.map(|entry| entry.into_lowercase())
.collect()
}
fn match_entry(&self, entry: &DictEntry, word: &str) -> DictMatches {
@ -55,7 +80,8 @@ impl Encoder {
return false;
}
if entry
if !entry.only_after.is_empty()
&& entry
.only_after
.iter()
.all(|prefix| !before_context.ends_with(prefix))
@ -63,7 +89,8 @@ impl Encoder {
return false;
}
if entry
if !entry.only_before.is_empty()
&& entry
.only_before
.iter()
.all(|suffix| !after_context.starts_with(suffix))
@ -80,7 +107,7 @@ impl SystemEncoder for Encoder {
let mut matches: DictMatches = self
.dict
.iter()
.flat_map(|entry| self.match_entry(&entry, &word))
.flat_map(|entry| self.match_entry(&entry, &word.to_lowercase()))
.collect();
matches.sort_by_key(|&(pos, _)| pos);
@ -88,26 +115,3 @@ impl SystemEncoder for Encoder {
matches.into_iter().map(|(_, value)| value).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_basic_dict() -> Dict {
vec![DictEntry {
phoneme_in: "B".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["Y".to_string()],
not_before: vec!["X".to_string()],
only_after: vec!["A".to_string()],
only_before: vec!["C".to_string()],
}]
}
#[test]
fn test_single_symbol_encoding_all_reqirements_met() {
let encoder = Encoder::new(create_basic_dict());
let output = encoder.encode("ABC");
assert_eq!(output, "2")
}
}

120
lib/src/core/major/tests.rs

@ -1,11 +1,121 @@
use crate::core::major::*;
use crate::core::traits::SystemEncoder;
#[cfg(test)]
mod tests {
use super::*;
// #[test]
// fn test_processing() {
// let processor = TextProcessor::new(">> ");
// assert_eq!(processor.process("hello"), ">> HELLO");
// }
fn create_single_dict() -> Dict {
vec![DictEntry {
phoneme_in: "B".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["Y".to_string()],
not_before: vec!["X".to_string()],
only_after: vec!["A".to_string()],
only_before: vec!["C".to_string()],
}]
}
fn create_single_dict_min() -> Dict {
vec![DictEntry {
phoneme_in: "B".to_string(),
phoneme_out: "2".to_string(),
..Default::default()
}]
}
fn create_double_dict() -> Dict {
vec![DictEntry {
phoneme_in: "CD".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["YZ".to_string()],
not_before: vec!["WX".to_string()],
only_after: vec!["AB".to_string()],
only_before: vec!["EF".to_string()],
}]
}
#[test]
fn test_single_symbol_encoding_only_before_only_after_matched() {
let encoder = Encoder::new(create_single_dict());
let output = encoder.encode("ABC");
assert_eq!(output, "2")
}
#[test]
fn test_double_symbol_encoding_only_before_only_after_matched() {
let encoder = Encoder::new(create_double_dict());
let output = encoder.encode("ABCDEF");
assert_eq!(output, "2")
}
#[test]
fn test_single_symbol_encoding_only_before_not_matched_with_other() {
let encoder = Encoder::new(create_single_dict());
let output = encoder.encode("DBC");
assert_eq!(output, "")
}
#[test]
fn test_double_symbol_encoding_only_before_not_matched_with_other() {
let encoder = Encoder::new(create_double_dict());
let output = encoder.encode("AACDEE");
assert_eq!(output, "")
}
#[test]
fn test_case_insensitivity() {
let encoder = Encoder::new(create_double_dict());
let output = encoder.encode("abcdef");
assert_eq!(output, "2")
}
#[test]
fn test_single_symbol_encoding_only_before_not_matched_with_empty() {
let encoder = Encoder::new(create_single_dict());
let output = encoder.encode("BC");
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_before_not_matched_with_not_before() {
let encoder = Encoder::new(create_single_dict());
let output = encoder.encode("XBC");
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_after_not_matched_with_other() {
let encoder = Encoder::new(create_single_dict());
let output = encoder.encode("ABD");
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_after_not_matched_with_empty() {
let encoder = Encoder::new(create_single_dict());
let output = encoder.encode("AB");
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_after_not_matched_with_not_after() {
let encoder = Encoder::new(create_single_dict());
let output = encoder.encode("ABY");
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_empty_before_after_matched_with_empty() {
let encoder = Encoder::new(create_single_dict_min());
let output = encoder.encode("B");
assert_eq!(output, "2")
}
#[test]
fn test_single_symbol_encoding_empty_before_after_matched_with_others() {
let encoder = Encoder::new(create_single_dict_min());
let output = encoder.encode("AXBYC");
assert_eq!(output, "2")
}
}

2
lib/src/core/mod.rs

@ -2,6 +2,6 @@ pub mod major;
pub mod system;
pub mod traits;
pub use self::major::*;
// pub use self::major::*;
pub use self::system::*;
pub use self::traits::*;

Loading…
Cancel
Save