From d6a475a0cab366bed9b7d8b1a0472a51275b6b3e Mon Sep 17 00:00:00 2001 From: chodak166 Date: Mon, 29 Dec 2025 19:47:56 +0100 Subject: [PATCH] WIP: encoder --- lib/src/core/entities.rs | 9 +++++-- lib/src/core/sys_major/decoder.rs | 5 ++-- lib/src/core/sys_major/lvmap.rs | 29 +++++++++++++++++++++ lib/src/presentation/cli/commands/encode.rs | 7 +++-- lib/src/presentation/cli/defaults.rs | 10 ++++++- 5 files changed, 52 insertions(+), 8 deletions(-) diff --git a/lib/src/core/entities.rs b/lib/src/core/entities.rs index 93b7808..720d554 100644 --- a/lib/src/core/entities.rs +++ b/lib/src/core/entities.rs @@ -1,10 +1,11 @@ use super::errors::CodecError; +use serde::Serialize; use std::num::ParseIntError; use std::ops::Deref; use std::{collections::HashMap, u64}; /// A number encoded as a sequence of words -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub struct EncodedPart { pub value: u64, pub words: Vec, @@ -14,7 +15,7 @@ pub struct EncodedPart { pub type EncodedSplit = Vec; /// A number encoded as words, split in multiple ways -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub struct EncodedValue(Vec); impl EncodedValue { @@ -60,6 +61,10 @@ impl DecodedValue { self.0.len() } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + pub fn value_len(&self) -> Result { if self.len() == 0 { return Err(CodecError::EmptyValue); diff --git a/lib/src/core/sys_major/decoder.rs b/lib/src/core/sys_major/decoder.rs index b7ce7c6..ea8d2a3 100644 --- a/lib/src/core/sys_major/decoder.rs +++ b/lib/src/core/sys_major/decoder.rs @@ -66,8 +66,8 @@ impl Decoder { fn is_context_matched(&self, entry: &Rule, word: &str, index: usize) -> bool { let before_context = &word[..index]; let after_context = &word[index + entry.phoneme_in.len()..]; - dbg!(&before_context); - dbg!(&after_context); + // dbg!(&before_context); + // dbg!(&after_context); if entry .not_after @@ -116,7 +116,6 @@ impl SystemDecoder for Decoder { .collect(); matches.sort_by_key(|&(pos, _)| pos); - dbg!(&matches); let num_str: String = matches.into_iter().map(|(_, value)| value).collect(); DecodedValue::new(num_str) } diff --git a/lib/src/core/sys_major/lvmap.rs b/lib/src/core/sys_major/lvmap.rs index 9620ffd..f2c2c49 100644 --- a/lib/src/core/sys_major/lvmap.rs +++ b/lib/src/core/sys_major/lvmap.rs @@ -86,6 +86,9 @@ impl LenValueMap { continue; } let decoded = decoder.decode(&word)?; + if decoded.is_empty() { + continue; + } self.data .entry(decoded.value_len()?) @@ -249,6 +252,32 @@ mod tests { assert!(words.contains(&TEST_WORD_4.to_string())); } + #[test] + fn test_skip_empty_decodes() { + let words = vec![TEST_WORD_1.to_string(), TEST_WORD_2.to_string()]; + let mut decoder = MockDecoder::new(); + decoder.expect_decode().returning(|word| { + if word == TEST_WORD_1 { + DecodedValue::new("".to_string()) + } else { + DecodedValue::new(TEST_NUM_2.to_string()) + } + }); + + let mut lv_map = LenValueMap::new(); + lv_map.insert_words(words, &decoder).unwrap(); + + let data = lv_map.into_data(); + + assert_eq!(data.len(), 1); + assert!(data.contains_key(&TEST_NUM_1_LEN)); + let data = data.get(&TEST_NUM_1_LEN).unwrap(); + assert!(data.contains_key(&TEST_NUM_2)); + let words = data.get(&TEST_NUM_2).unwrap(); + assert_eq!(words.len(), 1); + assert_eq!(words[0], TEST_WORD_2); + } + #[test] fn test_decoder_error_propagates() { let mut decoder = MockDecoder::new(); diff --git a/lib/src/presentation/cli/commands/encode.rs b/lib/src/presentation/cli/commands/encode.rs index c78704f..3750542 100644 --- a/lib/src/presentation/cli/commands/encode.rs +++ b/lib/src/presentation/cli/commands/encode.rs @@ -5,6 +5,9 @@ use tracing::debug; pub async fn run(config: EncoderConfig, dict: &dyn DictRepository) { debug!("Running encoder with config {:?}", config); let encoder = system::create_encoder(&config.system, dict).await; - let result = encoder.encode(&config.input); - println!("{:?}", result); + let result = encoder.encode(&config.input).unwrap(); + + let json = serde_json::to_string_pretty(&result).expect("JSON serialization failed"); + + println!("{}", json); } diff --git a/lib/src/presentation/cli/defaults.rs b/lib/src/presentation/cli/defaults.rs index 0e6430f..e4cce72 100644 --- a/lib/src/presentation/cli/defaults.rs +++ b/lib/src/presentation/cli/defaults.rs @@ -7,6 +7,8 @@ pub const HOST: &str = "127.0.0.1"; pub const PORT: u16 = 8080; pub const LOG_LEVEL: &str = "info"; pub const SYSTEM_NAME: &str = "major_pl"; +pub const IMPORT_DICT_NAME: &str = ""; +pub const IMPORT_DICT_PATH: &str = ""; pub const HELP_PORT: &str = formatcp!("Override Port [default: {}]", PORT); pub const HELP_LOG: &str = formatcp!("Override Log Level [default: {}]", LOG_LEVEL); @@ -27,7 +29,13 @@ pub fn set_defaults( .set_default("server.port", PORT)? // Decoder .set_default("decoder.system", SYSTEM_NAME)? - .set_default("decoder.input", "") + .set_default("decoder.input", "")? + // Encoder + .set_default("encoder.system", SYSTEM_NAME)? + .set_default("encoder.input", "")? + // Import Dict + .set_default("import_dict.name", IMPORT_DICT_NAME)? + .set_default("import_dict.path", IMPORT_DICT_PATH) // Wrapping in Result .map_err(|e| e.into()) }