diff --git a/Cargo.toml b/Cargo.toml index f6397ec..8ffd4b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,4 +8,8 @@ resolver = "3" [workspace.package] edition = "2024" authors = ["chodak166 "] -license = "MIT" \ No newline at end of file +license = "MIT" + +[profile.release] +lto = true # Link Time Optimization: Analyzes entire program for optimizations +codegen-units = 1 # Forces single-threaded compilation (slower build, but smaller/faster binary) diff --git a/app/Cargo.toml b/app/Cargo.toml index f6a49b8..d8187de 100644 --- a/app/Cargo.toml +++ b/app/Cargo.toml @@ -6,7 +6,3 @@ edition = "2024" [dependencies] clap = { version = "4.5.51", features = ["derive"] } libmnemor = { path = "../lib" } - -[profile.release] -lto = true # Link Time Optimization: Analyzes entire program for optimizations -codegen-units = 1 # Forces single-threaded compilation (slower build, but smaller/faster binary) diff --git a/lib/src/core/major.rs b/lib/src/core/major.rs index 9b3e0a6..621ac9d 100644 --- a/lib/src/core/major.rs +++ b/lib/src/core/major.rs @@ -2,4 +2,7 @@ pub mod dict_en; pub mod dict_pl; mod encoder; +#[cfg(test)] +mod tests; + pub use encoder::*; diff --git a/lib/src/core/major/encoder.rs b/lib/src/core/major/encoder.rs index ed279ae..3134a65 100644 --- a/lib/src/core/major/encoder.rs +++ b/lib/src/core/major/encoder.rs @@ -12,6 +12,23 @@ pub struct DictEntry { pub only_after: Vec, } +impl DictEntry { + pub fn into_lowercase(self) -> Self { + DictEntry { + phoneme_in: self.phoneme_in.to_lowercase(), + phoneme_out: self.phoneme_out.to_lowercase(), + not_before: Self::lower_vec(self.not_before), + not_after: Self::lower_vec(self.not_after), + only_before: Self::lower_vec(self.only_before), + only_after: Self::lower_vec(self.only_after), + } + } + + fn lower_vec(vec: Vec) -> Vec { + vec.into_iter().map(|s| s.to_lowercase()).collect() + } +} + pub type Dict = Vec; /// (index, encoded value) @@ -23,7 +40,15 @@ pub struct Encoder { impl Encoder { pub fn new(dict: Dict) -> Self { - Encoder { dict: dict } + Encoder { + dict: Encoder::to_lower_dict(dict), + } + } + + fn to_lower_dict(dict: Dict) -> Dict { + dict.into_iter() + .map(|entry| entry.into_lowercase()) + .collect() } fn match_entry(&self, entry: &DictEntry, word: &str) -> DictMatches { @@ -55,18 +80,20 @@ impl Encoder { return false; } - if entry - .only_after - .iter() - .all(|prefix| !before_context.ends_with(prefix)) + if !entry.only_after.is_empty() + && entry + .only_after + .iter() + .all(|prefix| !before_context.ends_with(prefix)) { return false; } - if entry - .only_before - .iter() - .all(|suffix| !after_context.starts_with(suffix)) + if !entry.only_before.is_empty() + && entry + .only_before + .iter() + .all(|suffix| !after_context.starts_with(suffix)) { return false; } @@ -80,7 +107,7 @@ impl SystemEncoder for Encoder { let mut matches: DictMatches = self .dict .iter() - .flat_map(|entry| self.match_entry(&entry, &word)) + .flat_map(|entry| self.match_entry(&entry, &word.to_lowercase())) .collect(); matches.sort_by_key(|&(pos, _)| pos); @@ -88,26 +115,3 @@ impl SystemEncoder for Encoder { matches.into_iter().map(|(_, value)| value).collect() } } - -#[cfg(test)] -mod tests { - use super::*; - - fn create_basic_dict() -> Dict { - vec![DictEntry { - phoneme_in: "B".to_string(), - phoneme_out: "2".to_string(), - not_after: vec!["Y".to_string()], - not_before: vec!["X".to_string()], - only_after: vec!["A".to_string()], - only_before: vec!["C".to_string()], - }] - } - - #[test] - fn test_single_symbol_encoding_all_reqirements_met() { - let encoder = Encoder::new(create_basic_dict()); - let output = encoder.encode("ABC"); - assert_eq!(output, "2") - } -} diff --git a/lib/src/core/major/tests.rs b/lib/src/core/major/tests.rs index 12c3af6..8a36075 100644 --- a/lib/src/core/major/tests.rs +++ b/lib/src/core/major/tests.rs @@ -1,11 +1,121 @@ +use crate::core::major::*; +use crate::core::traits::SystemEncoder; #[cfg(test)] mod tests { use super::*; - // #[test] - // fn test_processing() { - // let processor = TextProcessor::new(">> "); - // assert_eq!(processor.process("hello"), ">> HELLO"); - // } + fn create_single_dict() -> Dict { + vec![DictEntry { + phoneme_in: "B".to_string(), + phoneme_out: "2".to_string(), + not_after: vec!["Y".to_string()], + not_before: vec!["X".to_string()], + only_after: vec!["A".to_string()], + only_before: vec!["C".to_string()], + }] + } + + fn create_single_dict_min() -> Dict { + vec![DictEntry { + phoneme_in: "B".to_string(), + phoneme_out: "2".to_string(), + ..Default::default() + }] + } + + fn create_double_dict() -> Dict { + vec![DictEntry { + phoneme_in: "CD".to_string(), + phoneme_out: "2".to_string(), + not_after: vec!["YZ".to_string()], + not_before: vec!["WX".to_string()], + only_after: vec!["AB".to_string()], + only_before: vec!["EF".to_string()], + }] + } + + #[test] + fn test_single_symbol_encoding_only_before_only_after_matched() { + let encoder = Encoder::new(create_single_dict()); + let output = encoder.encode("ABC"); + assert_eq!(output, "2") + } + + #[test] + fn test_double_symbol_encoding_only_before_only_after_matched() { + let encoder = Encoder::new(create_double_dict()); + let output = encoder.encode("ABCDEF"); + assert_eq!(output, "2") + } + + #[test] + fn test_single_symbol_encoding_only_before_not_matched_with_other() { + let encoder = Encoder::new(create_single_dict()); + let output = encoder.encode("DBC"); + assert_eq!(output, "") + } + + #[test] + fn test_double_symbol_encoding_only_before_not_matched_with_other() { + let encoder = Encoder::new(create_double_dict()); + let output = encoder.encode("AACDEE"); + assert_eq!(output, "") + } + + #[test] + fn test_case_insensitivity() { + let encoder = Encoder::new(create_double_dict()); + let output = encoder.encode("abcdef"); + assert_eq!(output, "2") + } + + #[test] + fn test_single_symbol_encoding_only_before_not_matched_with_empty() { + let encoder = Encoder::new(create_single_dict()); + let output = encoder.encode("BC"); + assert_eq!(output, "") + } + + #[test] + fn test_single_symbol_encoding_only_before_not_matched_with_not_before() { + let encoder = Encoder::new(create_single_dict()); + let output = encoder.encode("XBC"); + assert_eq!(output, "") + } + + #[test] + fn test_single_symbol_encoding_only_after_not_matched_with_other() { + let encoder = Encoder::new(create_single_dict()); + let output = encoder.encode("ABD"); + assert_eq!(output, "") + } + + #[test] + fn test_single_symbol_encoding_only_after_not_matched_with_empty() { + let encoder = Encoder::new(create_single_dict()); + let output = encoder.encode("AB"); + assert_eq!(output, "") + } + + #[test] + fn test_single_symbol_encoding_only_after_not_matched_with_not_after() { + let encoder = Encoder::new(create_single_dict()); + let output = encoder.encode("ABY"); + assert_eq!(output, "") + } + + #[test] + fn test_single_symbol_encoding_empty_before_after_matched_with_empty() { + let encoder = Encoder::new(create_single_dict_min()); + let output = encoder.encode("B"); + assert_eq!(output, "2") + } + + #[test] + fn test_single_symbol_encoding_empty_before_after_matched_with_others() { + let encoder = Encoder::new(create_single_dict_min()); + let output = encoder.encode("AXBYC"); + assert_eq!(output, "2") + } } diff --git a/lib/src/core/mod.rs b/lib/src/core/mod.rs index 2cf7ef2..5c75643 100644 --- a/lib/src/core/mod.rs +++ b/lib/src/core/mod.rs @@ -2,6 +2,6 @@ pub mod major; pub mod system; pub mod traits; -pub use self::major::*; +// pub use self::major::*; pub use self::system::*; pub use self::traits::*;