diff --git a/lib/src/sys_major/removeme.old/application.rs b/lib/src/sys_major/removeme.old/application.rs deleted file mode 100644 index 37d536f..0000000 --- a/lib/src/sys_major/removeme.old/application.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod config; -pub mod errors; -pub mod services; -pub mod traits; diff --git a/lib/src/sys_major/removeme.old/application/config.rs b/lib/src/sys_major/removeme.old/application/config.rs deleted file mode 100644 index de84147..0000000 --- a/lib/src/sys_major/removeme.old/application/config.rs +++ /dev/null @@ -1,25 +0,0 @@ -use crate::core::system::System; -use serde::Deserialize; - -#[derive(Debug, Deserialize, Clone)] -pub struct ServerConfig { - pub port: u16, -} - -#[derive(Debug, Deserialize, Clone)] -pub struct DecoderConfig { - pub system: System, - pub input: String, -} - -#[derive(Debug, Deserialize, Clone)] -pub struct EncoderConfig { - pub system: System, - pub input: String, -} - -#[derive(Debug, Deserialize, Clone)] -pub struct ImportDictConfig { - pub name: String, - pub path: String, -} diff --git a/lib/src/sys_major/removeme.old/application/errors.rs b/lib/src/sys_major/removeme.old/application/errors.rs deleted file mode 100644 index 8b13789..0000000 --- a/lib/src/sys_major/removeme.old/application/errors.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/src/sys_major/removeme.old/application/services.rs b/lib/src/sys_major/removeme.old/application/services.rs deleted file mode 100644 index 89fd02f..0000000 --- a/lib/src/sys_major/removeme.old/application/services.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::sync::Arc; - -use crate::core::traits::{DictRepository, DictSource}; - -pub struct DictImporter { - repo: Arc, - batch_size: usize, -} - -impl DictImporter { - pub fn new(repo: Arc) -> Self { - Self { - repo, - batch_size: 1000, // reasonable default - } - } - - pub fn with_batch_size(mut self, batch_size: usize) -> Self { - self.batch_size = batch_size; - self - } - - pub async fn import(&self, mut source: impl DictSource) -> Result<(), anyhow::Error> { - // 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?) - self.repo.create_dict().await?; - - let mut batch = Vec::with_capacity(self.batch_size); - - // 2. Stream data - while let Some(result) = source.next_entry() { - match result { - Ok(entry) => { - // Optional: Domain Validation logic could go here - // if entry.text.is_empty() { continue; } - - batch.push(entry); - - // 3. Batch Write - if batch.len() >= self.batch_size { - self.repo.save_entries(&batch).await?; - batch.clear(); - } - } - Err(e) => { - // Logic: Do we abort on malformed JSON or log and continue? - // Here we abort for safety. - return Err(e); - } - } - } - - // 4. Flush remaining - if !batch.is_empty() { - self.repo.save_entries(&batch).await?; - } - - Ok(()) - } -} diff --git a/lib/src/sys_major/removeme.old/application/traits.rs b/lib/src/sys_major/removeme.old/application/traits.rs deleted file mode 100644 index 8b13789..0000000 --- a/lib/src/sys_major/removeme.old/application/traits.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/src/sys_major/removeme.old/core.rs b/lib/src/sys_major/removeme.old/core.rs deleted file mode 100644 index 4877415..0000000 --- a/lib/src/sys_major/removeme.old/core.rs +++ /dev/null @@ -1,11 +0,0 @@ -pub mod entities; -pub mod errors; -pub mod sys_major; -pub mod system; -pub mod traits; - -// pub use self::major::*; -// pub use self::entities::*; -// pub use self::errors::*; -// pub use self::system::*; -pub use self::traits::*; diff --git a/lib/src/sys_major/removeme.old/core/entities.rs b/lib/src/sys_major/removeme.old/core/entities.rs deleted file mode 100644 index 720d554..0000000 --- a/lib/src/sys_major/removeme.old/core/entities.rs +++ /dev/null @@ -1,146 +0,0 @@ -use super::errors::CodecError; -use serde::Serialize; -use std::num::ParseIntError; -use std::ops::Deref; -use std::{collections::HashMap, u64}; - -/// A number encoded as a sequence of words -#[derive(Debug, Clone, Serialize)] -pub struct EncodedPart { - pub value: u64, - pub words: Vec, -} - -/// A way (variant) to split input number -pub type EncodedSplit = Vec; - -/// A number encoded as words, split in multiple ways -#[derive(Debug, Clone, Serialize)] -pub struct EncodedValue(Vec); - -impl EncodedValue { - pub fn new(data: Vec) -> Self { - EncodedValue(data) - } -} - -impl Deref for EncodedValue { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -/// The number value can be encoded as many word sets, -/// but decoded as one number. For partial values -/// and dictionary words (reasonable length), we can use -/// u64 (20-digit number), but the whole input text can -/// be longer than 20 digits, so we operate on String (<= 255). -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DecodedValue(String); - -impl DecodedValue { - pub fn new(value: String) -> Result { - if value.len() > u8::MAX as usize { - Err(CodecError::TextTooLong(value.len())) - } else { - Ok(Self(value)) - } - } - - pub fn as_str(&self) -> &str { - &self.0 - } - - pub fn parse(&self) -> Result { - self.0.parse() - } - - pub fn len(&self) -> usize { - self.0.len() - } - - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - pub fn value_len(&self) -> Result { - if self.len() == 0 { - return Err(CodecError::EmptyValue); - } - DecodedLength::try_from(self.len()) - } -} - -impl PartialEq<&str> for DecodedValue { - fn eq(&self, other: &&str) -> bool { - &self.0 == *other - } -} - -impl PartialEq for &str { - fn eq(&self, other: &DecodedValue) -> bool { - *self == &other.0 - } -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] -pub struct DecodedLength(u8); - -impl DecodedLength { - pub const fn from(value: u8) -> Self { - Self(value) - } -} - -impl TryFrom for DecodedLength { - type Error = CodecError; - fn try_from(value: usize) -> Result { - if value > u8::MAX as usize { - Err(CodecError::ValueLimitExceeded(value)) - } else { - Ok(Self(value as u8)) - } - } -} - -// --- Dictionary --- - -pub type DictEntryId = u64; - -#[derive(Debug, Clone, PartialEq)] -pub struct DictEntry { - pub id: Option, - pub text: String, - pub metadata: HashMap, -} - -impl DictEntry { - pub fn new(id: Option, text: String) -> Self { - DictEntry { - id, - text, - metadata: HashMap::new(), - } - } -} - -#[derive(Debug, Clone)] -pub struct Dict { - pub name: String, - pub entries: HashMap, -} - -impl Dict { - pub fn new(name: String) -> Self { - Dict { - name, - entries: HashMap::new(), - } - } - - pub fn add_entry(&mut self, entry: DictEntry) { - self.entries.insert(entry.id.unwrap(), entry); - } -} diff --git a/lib/src/sys_major/removeme.old/core/errors.rs b/lib/src/sys_major/removeme.old/core/errors.rs deleted file mode 100644 index 7f05a37..0000000 --- a/lib/src/sys_major/removeme.old/core/errors.rs +++ /dev/null @@ -1,31 +0,0 @@ -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum RepositoryError { - #[error("Data source connection failed")] - ConnectionFailed, - - #[error("'{0}' not found")] - NotFound(String), - - #[error("Storage error: {0}")] - StorageError(String), -} - -#[derive(Debug, Error)] -pub enum CodecError { - #[error("text too long: {0} bytes")] - TextTooLong(usize), - - #[error("value too large: {0}/255")] - ValueLimitExceeded(usize), - - #[error("operation not allowed on empty value")] - EmptyValue, - - #[error("initialization failed")] - InitializationFailed, - - #[error("unexpected error: {0}")] - UnexpectedError(String), -} diff --git a/lib/src/sys_major/removeme.old/core/sys_major.rs b/lib/src/sys_major/removeme.old/core/sys_major.rs deleted file mode 100644 index 7678c9e..0000000 --- a/lib/src/sys_major/removeme.old/core/sys_major.rs +++ /dev/null @@ -1,12 +0,0 @@ -pub mod decoder; -pub mod encoder; -mod lvmap; -pub mod rules_en; -pub mod rules_pl; - -#[cfg(test)] -mod decoder_tests; - -pub use decoder::*; -pub use encoder::*; -pub use lvmap::LenValueMap; diff --git a/lib/src/sys_major/removeme.old/core/sys_major/decoder.rs b/lib/src/sys_major/removeme.old/core/sys_major/decoder.rs deleted file mode 100644 index ea8d2a3..0000000 --- a/lib/src/sys_major/removeme.old/core/sys_major/decoder.rs +++ /dev/null @@ -1,122 +0,0 @@ -use crate::core::{entities::DecodedValue, errors::CodecError, traits::SystemDecoder}; - -#[derive(Debug, Default, Clone)] -pub struct Rule { - pub phoneme_in: String, - pub phoneme_out: String, - - pub not_before: Vec, - pub not_after: Vec, - - pub only_before: Vec, - pub only_after: Vec, -} - -impl Rule { - pub fn into_lowercase(self) -> Self { - Rule { - phoneme_in: self.phoneme_in.to_lowercase(), - phoneme_out: self.phoneme_out.to_lowercase(), - not_before: Self::lower_vec(self.not_before), - not_after: Self::lower_vec(self.not_after), - only_before: Self::lower_vec(self.only_before), - only_after: Self::lower_vec(self.only_after), - } - } - - fn lower_vec(vec: Vec) -> Vec { - vec.into_iter().map(|s| s.to_lowercase()).collect() - } -} - -pub type Rules = Vec; -// pub struct rules { -// name: String, -// entries: Rules, -// } - -/// (index, decoded value) -type RuleMatches = Vec<(usize, String)>; - -pub struct Decoder { - rules: Rules, -} - -impl Decoder { - pub fn new(rules: Rules) -> Self { - Decoder { - rules: Decoder::to_lower_rules(rules), - } - } - - fn to_lower_rules(rules: Rules) -> Rules { - rules - .into_iter() - .map(|entry| entry.into_lowercase()) - .collect() - } - - fn match_entry(&self, entry: &Rule, word: &str) -> RuleMatches { - word.match_indices(&entry.phoneme_in) - .filter(|(index, _)| self.is_context_matched(&entry, &word, *index)) - .map(|(index, _)| (index, entry.phoneme_out.clone())) - .collect() - } - - fn is_context_matched(&self, entry: &Rule, word: &str, index: usize) -> bool { - let before_context = &word[..index]; - let after_context = &word[index + entry.phoneme_in.len()..]; - // dbg!(&before_context); - // dbg!(&after_context); - - if entry - .not_after - .iter() - .any(|prefix| before_context.ends_with(prefix)) - { - return false; - } - - if entry - .not_before - .iter() - .any(|suffix| after_context.starts_with(suffix)) - { - return false; - } - - if !entry.only_after.is_empty() - && entry - .only_after - .iter() - .all(|prefix| !before_context.ends_with(prefix)) - { - return false; - } - - if !entry.only_before.is_empty() - && entry - .only_before - .iter() - .all(|suffix| !after_context.starts_with(suffix)) - { - return false; - } - - true - } -} - -impl SystemDecoder for Decoder { - fn decode(&self, word: &str) -> Result { - let mut matches: RuleMatches = self - .rules - .iter() - .flat_map(|entry| self.match_entry(&entry, &word.to_lowercase())) - .collect(); - - matches.sort_by_key(|&(pos, _)| pos); - let num_str: String = matches.into_iter().map(|(_, value)| value).collect(); - DecodedValue::new(num_str) - } -} diff --git a/lib/src/sys_major/removeme.old/core/sys_major/decoder_tests.rs b/lib/src/sys_major/removeme.old/core/sys_major/decoder_tests.rs deleted file mode 100644 index 2c79050..0000000 --- a/lib/src/sys_major/removeme.old/core/sys_major/decoder_tests.rs +++ /dev/null @@ -1,134 +0,0 @@ -use super::decoder::{Decoder, Rule, Rules}; -use crate::core::traits::SystemDecoder; - -#[cfg(test)] -mod tests { - use super::*; - - fn create_single_rules() -> Rules { - vec![Rule { - phoneme_in: "B".to_string(), - phoneme_out: "2".to_string(), - not_after: vec!["Y".to_string()], - not_before: vec!["X".to_string()], - only_after: vec!["A".to_string()], - only_before: vec!["C".to_string()], - }] - } - - fn create_single_rules_min() -> Rules { - vec![Rule { - phoneme_in: "B".to_string(), - phoneme_out: "2".to_string(), - ..Default::default() - }] - } - - fn create_double_rules() -> Rules { - vec![ - Rule { - phoneme_in: "CD".to_string(), - phoneme_out: "2".to_string(), - not_after: vec!["00".to_string(), "YZ".to_string()], - not_before: vec!["11".to_string(), "WX".to_string()], - only_after: vec!["22".to_string(), "AB".to_string()], - only_before: vec!["33".to_string(), "EF".to_string()], - }, - Rule { - phoneme_in: "MN".to_string(), - phoneme_out: "3".to_string(), - ..Default::default() - }, - ] - } - - #[test] - fn test_single_symbol_encoding_only_before_only_after_matched() { - let decoder = Decoder::new(create_single_rules()); - let output = decoder.decode("ABC").unwrap(); - assert_eq!(output, "2") - } - - #[test] - fn test_double_symbol_encoding_only_before_only_after_matched() { - let decoder = Decoder::new(create_double_rules()); - let output = decoder.decode("ABCDEF").unwrap(); - assert_eq!(output, "2") - } - - #[test] - fn test_single_symbol_encoding_only_before_not_matched_with_other() { - let decoder = Decoder::new(create_single_rules()); - let output = decoder.decode("DBC").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_double_symbol_encoding_only_before_not_matched_with_other() { - let decoder = Decoder::new(create_double_rules()); - let output = decoder.decode("AACDEE").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_case_insensitivity() { - let decoder = Decoder::new(create_double_rules()); - let output = decoder.decode("abcdef").unwrap(); - assert_eq!(output, "2") - } - - #[test] - fn test_single_symbol_encoding_only_before_not_matched_with_empty() { - let decoder = Decoder::new(create_single_rules()); - let output = decoder.decode("BC").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_single_symbol_encoding_only_before_not_matched_with_not_before() { - let decoder = Decoder::new(create_single_rules()); - let output = decoder.decode("XBC").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_single_symbol_encoding_only_after_not_matched_with_other() { - let decoder = Decoder::new(create_single_rules()); - let output = decoder.decode("ABD").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_single_symbol_encoding_only_after_not_matched_with_empty() { - let decoder = Decoder::new(create_single_rules()); - let output = decoder.decode("AB").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_single_symbol_encoding_only_after_not_matched_with_not_after() { - let decoder = Decoder::new(create_single_rules()); - let output = decoder.decode("ABY").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_single_symbol_encoding_empty_before_after_matched_with_empty() { - let decoder = Decoder::new(create_single_rules_min()); - let output = decoder.decode("B").unwrap(); - assert_eq!(output, "2") - } - - #[test] - fn test_single_symbol_encoding_empty_before_after_matched_with_others() { - let decoder = Decoder::new(create_single_rules_min()); - let output = decoder.decode("AXBYC").unwrap(); - assert_eq!(output, "2") - } - #[test] - fn test_encoding_multiple_phonemes() { - let decoder = Decoder::new(create_double_rules()); - let output = decoder.decode("VvmNabCd33mn00CD22cdefmn").unwrap(); - assert_eq!(output, "32323") - } -} diff --git a/lib/src/sys_major/removeme.old/core/sys_major/encoder.rs b/lib/src/sys_major/removeme.old/core/sys_major/encoder.rs deleted file mode 100644 index 7bfc84b..0000000 --- a/lib/src/sys_major/removeme.old/core/sys_major/encoder.rs +++ /dev/null @@ -1,178 +0,0 @@ -use crate::core::{ - entities::{EncodedPart, EncodedSplit, EncodedValue}, - errors::CodecError, - sys_major::LenValueMap, - traits::*, -}; - -#[derive(Debug)] -pub struct Encoder { - lv_map: LenValueMap, -} - -impl Encoder { - pub fn new(lv_map: LenValueMap) -> Self { - Encoder { lv_map } - } -} - -impl SystemEncoder for Encoder { - fn initialize(&self) -> Result<(), CodecError> { - Ok(()) - } - - fn encode(&self, input: &str) -> Result { - let size = input.chars().count(); - let max_mask: usize = (1 << (size - 1)) - 1; - - let indices: Vec = input.char_indices().map(|(i, _)| i).collect(); - let mut results = Vec::with_capacity(max_mask); - - for mask in 0..=max_mask { - let mut parts: Vec = Vec::new(); - let mut last_split = input.char_indices().count(); // we go from right to left to start with the longest parts - - // Iterate through the mask bits to find where to split - for i in 0..size - 1 { - // Check if the i-th bit is set - if (mask >> i) & 1 == 1 { - // The split corresponds to the byte index of the (i+1)-th character - let split_idx = indices[indices.len() - i - 1]; - parts.push(input[split_idx..last_split].to_string()); - last_split = split_idx; - } - } - // Push the remaining part of the string - parts.push(input[..last_split].to_string()); - - let mut all_matched = true; - let mut split = EncodedSplit::new(); - parts.reverse(); - - for part in &parts { - let Ok(num_part) = part.parse::() else { - all_matched = false; - break; - }; - let Some(words) = self.lv_map.get(part.len() as u8, num_part) else { - all_matched = false; - break; - }; - split.push(EncodedPart { - value: num_part, - words: words.clone(), - }); - } - - if all_matched { - results.push(Partition { - value: split, - // To find the "most equal" size, we minimize the sum of squared lengths. - // (This mathematically minimizes variance without needing floating point math) - sum_sq_len: parts.iter().map(|p| p.chars().count().pow(2)).sum(), - }); - } - - // Calculate metrics for sorting - // let num_parts = parts.len(); - - // // To find the "most equal" size, we minimize the sum of squared lengths. - // // (This mathematically minimizes variance without needing floating point math) - // let sum_sq_len: usize = parts.iter().map(|p| p.chars().count().pow(2)).sum(); - - // if let Some(words) = self.lv_map.get(size as u8, input.parse().unwrap()) { - // results.push(Partition { - // parts: words.clone(), - // sum_sq_len, - // }); - // } - } - - // Ok(EncodedValue::new(words)) - // Sort by: - // 1. Fewer parts first (1 part, then 2 parts...) - // 2. Most equal lengths (lower sum of squared lengths is more balanced) - // 3. TODO: Lexicographically (for deterministic stability)? - results.sort_by(|a, b| { - a.value - .len() - .cmp(&b.value.len()) - .then(a.sum_sq_len.cmp(&b.sum_sq_len)) - }); - - // Extract just the strings - let split_results = results.into_iter().map(|p| p.value).collect(); - Ok(EncodedValue::new(split_results)) - } -} - -// A helper struct to keep the split variant and its sort metrics together -struct Partition { - value: EncodedSplit, - sum_sq_len: usize, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_encode_as_single_length_result() { - let mut lvmap = LenValueMap::new(); - lvmap.push(3, 123, "test_123"); - lvmap.push(3, 345, "test_345_1"); - lvmap.push(3, 345, "test_345_2"); - lvmap.push(3, 678, "test_678"); - let encoder = Encoder::new(lvmap); - let result = encoder.encode("345").unwrap(); - - assert_eq!(result.len(), 1); // single split - assert_eq!(result[0].len(), 1); // single part - assert_eq!(result[0][0].value, 345); - assert_eq!(result[0][0].words.len(), 2); // two words - assert_eq!(result[0][0].words[0], "test_345_1"); - assert_eq!(result[0][0].words[1], "test_345_2"); - } - - #[test] - fn test_encode_as_all_lengths() { - let mut lvmap = LenValueMap::new(); - lvmap.push(1, 0, "test_0"); - lvmap.push(1, 9, "test_9"); - lvmap.push(1, 8, "test_8"); - lvmap.push(1, 7, "test_7"); - lvmap.push(2, 98, "test_98"); - lvmap.push(2, 87, "test_87"); - lvmap.push(3, 987, "test_987"); - lvmap.push(3, 876, "test_876"); - - let encoder = Encoder::new(lvmap); - let result = encoder.encode("987").unwrap(); - - assert_eq!(result.len(), 4); // 987, 98|7, 9|87, 9|8|7 - assert_eq!(result[0].len(), 1); // 987 - - assert_eq!(result[0][0].words.len(), 1); - assert_eq!(result[0][0].words[0], "test_987"); - - assert_eq!(result[1].len(), 2); // 98|7 - assert_eq!(result[1][0].words.len(), 1); - assert_eq!(result[1][0].words[0], "test_98"); - assert_eq!(result[1][1].words.len(), 1); - assert_eq!(result[1][1].words[0], "test_7"); - - assert_eq!(result[2].len(), 2); // 9|87 - assert_eq!(result[2][0].words.len(), 1); - assert_eq!(result[2][0].words[0], "test_9"); - assert_eq!(result[2][1].words.len(), 1); - assert_eq!(result[2][1].words[0], "test_87"); - - assert_eq!(result[3].len(), 3); // 9|8|7 - assert_eq!(result[3][0].words.len(), 1); - assert_eq!(result[3][0].words[0], "test_9"); - assert_eq!(result[3][1].words.len(), 1); - assert_eq!(result[3][1].words[0], "test_8"); - assert_eq!(result[3][2].words.len(), 1); - assert_eq!(result[3][2].words[0], "test_7"); - } -} diff --git a/lib/src/sys_major/removeme.old/core/sys_major/lvmap.rs b/lib/src/sys_major/removeme.old/core/sys_major/lvmap.rs deleted file mode 100644 index 0f5d641..0000000 --- a/lib/src/sys_major/removeme.old/core/sys_major/lvmap.rs +++ /dev/null @@ -1,351 +0,0 @@ -use crate::core::{ - SystemDecoder, - entities::DecodedLength, - errors::{CodecError, RepositoryError}, -}; -use futures::{Stream, StreamExt}; -use std::{collections::HashMap, num::ParseIntError}; -use thiserror::Error; - -// We store words by encoded number length, then encoded value -// Example: -// root: -// - 3: -// - 750: -// - word: klasa -// - word: gilza -// - 849: -// - word: farba -// - 2: -// - 45: -// - word: oral - -#[derive(Error, Debug)] -pub enum LenValueMapError { - #[error("value parsing error: {0}")] - Parse(#[from] ParseIntError), - - #[error(transparent)] - Codec(#[from] CodecError), - - #[error(transparent)] - Repository(#[from] RepositoryError), - - #[error("unable to build encoder data: {0}")] - Build(String), -} - -type DecodedNumber = u64; -pub type LenValueData = HashMap>>; - -#[derive(Debug, Default, Clone)] -pub struct LenValueMap { - data: LenValueData, -} - -impl LenValueMap { - pub fn new() -> Self { - Self::default() - } - - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - pub fn into_data(self) -> LenValueData { - self.data - } - - pub fn push(&mut self, len: u8, num: DecodedNumber, word: impl Into) -> &mut Self { - self.data - .entry(DecodedLength::from(len)) - .or_insert_with(HashMap::new) - .entry(num) - .or_insert_with(Vec::new) - .push(word.into()); - self - } - - pub fn get(&self, len: u8, num: DecodedNumber) -> Option<&Vec> { - self.data.get(&DecodedLength::from(len))?.get(&num) - } - - pub fn insert_words( - &mut self, - words: I, - decoder: &dyn SystemDecoder, - ) -> Result<(), LenValueMapError> - where - I: IntoIterator, - { - for word in words { - if word.is_empty() { - continue; - } - let decoded = decoder.decode(&word)?; - if decoded.is_empty() { - continue; - } - - self.data - .entry(decoded.value_len()?) - .or_default() - .entry(decoded.parse()?) - .or_default() - .push(word); - } - Ok(()) - } - - pub fn from_data(data: LenValueData) -> Self { - Self { data: data } - } - - pub async fn from_stream( - stream: S, - decoder: &dyn SystemDecoder, - ) -> Result - where - // S is a stream of "Result, Error>" - S: Stream, crate::core::errors::RepositoryError>>, - { - let mut map = LenValueMap::new(); - let mut stream = Box::pin(stream); - - // We stream the batches one by one. - // This ensures only one batch is in memory at a time. - while let Some(batch_result) = stream.next().await { - match batch_result { - Ok(batch) => { - // We delegate to the synchronous logic for the heavy lifting - map.insert_words(batch, decoder)?; - } - Err(e) => { - // Convert RepositoryError to LenValueMapError::Build - return Err(e.into()); - } - } - } - - Ok(map) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::core::{entities::*, errors::*}; - use futures::stream; - - use std::collections::HashMap; - - use mockall::{mock, predicate::*}; - - const TEST_WORD_1: &str = "test_word_1"; - const TEST_WORD_2: &str = "test_word_2"; - const TEST_WORD_3: &str = "test_word_3"; - const TEST_WORD_4: &str = "test_word_4"; - const TEST_NUM_1: u64 = 12; - const TEST_NUM_2: u64 = 34; - const TEST_NUM_3: u64 = 9876; - const TEST_NUM_1_LEN: DecodedLength = DecodedLength::from(2); - const TEST_NUM_3_LEN: DecodedLength = DecodedLength::from(4); - - fn decoded_value(n: u64) -> DecodedValue { - DecodedValue::new(n.to_string()).unwrap() - } - - fn get_test_dec_map() -> HashMap { - HashMap::from([ - (TEST_WORD_1.to_string(), decoded_value(TEST_NUM_1)), - (TEST_WORD_2.to_string(), decoded_value(TEST_NUM_2)), - (TEST_WORD_3.to_string(), decoded_value(TEST_NUM_3)), - (TEST_WORD_4.to_string(), decoded_value(TEST_NUM_3)), - ]) - } - - fn mock_decoding(word: &str) -> Result { - get_test_dec_map() - .remove(word) - .ok_or_else(|| CodecError::UnexpectedError("".to_string())) - } - - fn get_test_words() -> Vec { - vec![ - TEST_WORD_1.to_string(), - TEST_WORD_2.to_string(), - TEST_WORD_3.to_string(), - TEST_WORD_4.to_string(), - ] - } - - mock! { - pub Decoder {} - impl SystemDecoder for Decoder { - fn decode(&self, word: &str) -> Result; - } - } - - #[test] - fn test_single_word() { - let words = vec![TEST_WORD_1.to_string()]; - let mut decoder = MockDecoder::new(); - decoder - .expect_decode() - .returning(|word| mock_decoding(word)); - - let mut lv_map = LenValueMap::new(); - lv_map.insert_words(words, &decoder).unwrap(); - - let data = lv_map.into_data(); - - assert_eq!(data.len(), 1); - assert!(data.contains_key(&TEST_NUM_1_LEN)); - let data = data.get(&TEST_NUM_1_LEN).unwrap(); - assert!(data.contains_key(&TEST_NUM_1)); - let words = data.get(&TEST_NUM_1).unwrap(); - assert_eq!(words.len(), 1); - assert_eq!(words[0], TEST_WORD_1); - } - - #[test] - fn test_multiple_words() { - let words = get_test_words(); - - let mut decoder = MockDecoder::new(); - decoder - .expect_decode() - .returning(|word| mock_decoding(word)); - - let mut lv_map = LenValueMap::new(); - lv_map.insert_words(words, &decoder).unwrap(); - - let data = lv_map.into_data(); - - assert_eq!(data.len(), 2); // two different lengths - assert!(data.contains_key(&TEST_NUM_1_LEN)); - assert!(data.contains_key(&TEST_NUM_3_LEN)); - let l2 = data.get(&TEST_NUM_1_LEN).unwrap(); - let l4 = data.get(&TEST_NUM_3_LEN).unwrap(); - - assert_eq!(l2.len(), 2); // two numbers - assert_eq!(l4.len(), 1); // one number - assert!(l2.contains_key(&TEST_NUM_1)); - assert!(l2.contains_key(&TEST_NUM_2)); - assert!(l4.contains_key(&TEST_NUM_3)); - - let words = l2.get(&TEST_NUM_1).unwrap(); - assert_eq!(words.len(), 1); - assert_eq!(words[0], TEST_WORD_1); - - let words = l2.get(&TEST_NUM_2).unwrap(); - assert_eq!(words.len(), 1); - assert_eq!(words[0], TEST_WORD_2); - - let words = l4.get(&TEST_NUM_3).unwrap(); - assert_eq!(words.len(), 2); - assert!(words.contains(&TEST_WORD_3.to_string())); - assert!(words.contains(&TEST_WORD_4.to_string())); - } - - #[test] - fn test_skip_empty_decodes() { - let words = vec![TEST_WORD_1.to_string(), TEST_WORD_2.to_string()]; - let mut decoder = MockDecoder::new(); - decoder.expect_decode().returning(|word| { - if word == TEST_WORD_1 { - DecodedValue::new("".to_string()) - } else { - DecodedValue::new(TEST_NUM_2.to_string()) - } - }); - - let mut lv_map = LenValueMap::new(); - lv_map.insert_words(words, &decoder).unwrap(); - - let data = lv_map.into_data(); - - assert_eq!(data.len(), 1); - assert!(data.contains_key(&TEST_NUM_1_LEN)); - let data = data.get(&TEST_NUM_1_LEN).unwrap(); - assert!(data.contains_key(&TEST_NUM_2)); - let words = data.get(&TEST_NUM_2).unwrap(); - assert_eq!(words.len(), 1); - assert_eq!(words[0], TEST_WORD_2); - } - - #[test] - fn test_decoder_error_propagates() { - let mut decoder = MockDecoder::new(); - decoder - .expect_decode() - .returning(|_| Err(CodecError::UnexpectedError("boom".into()))); - - let mut map = LenValueMap::new(); - let result = map.insert_words(vec!["x".into()], &decoder); - - assert!(result.is_err()); - } - - // --- build --- - - #[tokio::test] - async fn test_from_stream_success() { - // 1. Setup Mocks (Same as before) - let mut decoder = MockDecoder::new(); - decoder - .expect_decode() - .returning(|word| mock_decoding(word)); - - // 2. Prepare Data - // We wrap the inner Vecs in Ok() because the stream expects Result, RepositoryError> - let batches = vec![ - Ok(vec![TEST_WORD_1.into(), TEST_WORD_2.into()]), - Ok(vec![TEST_WORD_3.into(), TEST_WORD_4.into()]), - ]; - - // 3. Create a Stream from the Vec - // stream::iter converts an IntoIterator into a Stream - let stream = stream::iter(batches); - - // 4. Inject the stream (Dependency Injection) - let map = LenValueMap::from_stream(stream, &decoder) - .await - .expect("Should build map successfully"); - - // 5. Assertions - let data = map.into_data(); - assert_eq!(data.len(), 2); - assert!(data.contains_key(&TEST_NUM_1_LEN)); - assert!(data.contains_key(&TEST_NUM_3_LEN)); - } - - #[tokio::test] - async fn test_from_stream_failure() { - let mut decoder = MockDecoder::new(); - decoder - .expect_decode() - .returning(|word| mock_decoding(word)); - - let batches = vec![ - Ok(vec![TEST_WORD_1.into()]), - Err(RepositoryError::ConnectionFailed), - Ok(vec![TEST_WORD_3.into()]), - ]; - - let stream = stream::iter(batches); - let result = LenValueMap::from_stream(stream, &decoder).await; - - match result { - // We match specifically on the Repository variant and the ConnectionFailed inner error - Err(LenValueMapError::Repository(RepositoryError::ConnectionFailed)) => { - // Success! The correct error type propagated up. - } - // If it's any other error (including a stringified one), we fail - _ => panic!( - "Expected LenValueMapError::Repository(ConnectionFailed), got {:?}", - result - ), - } - } -} diff --git a/lib/src/sys_major/removeme.old/core/sys_major/rules_en.rs b/lib/src/sys_major/removeme.old/core/sys_major/rules_en.rs deleted file mode 100644 index 78f6cc6..0000000 --- a/lib/src/sys_major/removeme.old/core/sys_major/rules_en.rs +++ /dev/null @@ -1,15 +0,0 @@ -use super::decoder::{Rule, Rules}; - -pub fn get_rules() -> Rules { - vec![ - Rule { - phoneme_in: "EN".to_string(), - phoneme_out: "2".to_string(), - not_after: vec!["Y".to_string()], - not_before: vec!["X".to_string()], - only_after: vec!["A".to_string()], - only_before: vec!["C".to_string()], - }, - // ...more entries... - ] -} diff --git a/lib/src/sys_major/removeme.old/core/sys_major/rules_pl.rs b/lib/src/sys_major/removeme.old/core/sys_major/rules_pl.rs deleted file mode 100644 index 0cdf587..0000000 --- a/lib/src/sys_major/removeme.old/core/sys_major/rules_pl.rs +++ /dev/null @@ -1,222 +0,0 @@ -use super::decoder::{Rule, Rules}; - -pub fn get_rules() -> Rules { - vec![ - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "S".to_string(), - phoneme_out: "0".to_string(), - not_before: vec!["I".to_string(), "Z".to_string()], - only_before: vec![], - }, - Rule { - not_after: vec![ - "C".to_string(), - "D".to_string(), - "R".to_string(), - "S".to_string(), - ], - only_after: vec![], - phoneme_in: "Z".to_string(), - phoneme_out: "0".to_string(), - not_before: vec!["I".to_string()], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "T".to_string(), - phoneme_out: "1".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - only_after: vec![], - not_after: vec![], - phoneme_in: "D".to_string(), - phoneme_out: "1".to_string(), - not_before: vec!["Z".to_string(), "Ź".to_string(), "Ż".to_string()], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "N".to_string(), - phoneme_out: "2".to_string(), - not_before: vec!["I".to_string()], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "M".to_string(), - phoneme_out: "3".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "R".to_string(), - phoneme_out: "4".to_string(), - not_before: vec!["Z".to_string()], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "L".to_string(), - phoneme_out: "5".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "J".to_string(), - phoneme_out: "6".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "K".to_string(), - phoneme_out: "7".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "G".to_string(), - phoneme_out: "7".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "F".to_string(), - phoneme_out: "8".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "W".to_string(), - phoneme_out: "8".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "P".to_string(), - phoneme_out: "9".to_string(), - not_before: vec![], - only_before: vec![], - }, - Rule { - not_after: vec![], - only_after: vec![], - phoneme_in: "B".to_string(), - phoneme_out: "9".to_string(), - not_before: vec![], - only_before: vec![], - }, - ] -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::core::sys_major::Decoder; - use crate::traits::SystemDecoder; - - #[test] - fn test_major_dict_pl_decode_0_1() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("SZSCZ").unwrap(); - assert_eq!(output, "0") - } - - #[test] - fn test_major_dict_pl_decode_0_2() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("SZSICZ").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_major_dict_pl_decode_0_3() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("SZCZRZZCZDZSZ").unwrap(); - assert_eq!(output, "0") - } - - #[test] - fn test_major_dict_pl_decode_0_4() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("SZCZRZZICZDZSZ").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_major_dict_pl_decode_1_1() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("SZTCZ").unwrap(); - assert_eq!(output, "1") - } - - #[test] - fn test_major_dict_pl_decode_1_2() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("DZDŻDŹDDZDŻDŹ").unwrap(); - assert_eq!(output, "1") - } - - #[test] - fn test_major_dict_pl_decode_1_3() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("DZDŻDŹDZDZDŻDŹ").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_major_dict_pl_decode_2_1() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("NINNI").unwrap(); - assert_eq!(output, "2") - } - - #[test] - fn test_major_dict_pl_decode_2_2() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("NININI").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_major_dict_pl_decode_4_1() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("RZRRZ").unwrap(); - assert_eq!(output, "4") - } - - #[test] - fn test_major_dict_pl_decode_4_2() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("RZRZRZ").unwrap(); - assert_eq!(output, "") - } - - #[test] - fn test_major_dict_pl_decode_full_1() { - let decoder = Decoder::new(get_rules()); - let output = decoder.decode("ATADANAMARALAJAKAGAFAWAPABA").unwrap(); - assert_eq!(output, "1123456778899") - } -} diff --git a/lib/src/sys_major/removeme.old/core/system.rs b/lib/src/sys_major/removeme.old/core/system.rs deleted file mode 100644 index 3954ba1..0000000 --- a/lib/src/sys_major/removeme.old/core/system.rs +++ /dev/null @@ -1,44 +0,0 @@ -use serde::Deserialize; - -use crate::core::sys_major::{self as major, LenValueMap}; -use crate::core::{DictRepository, SystemDecoder, SystemEncoder}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)] -pub enum System { - #[serde(rename = "major_en")] - MajorEn, - #[serde(rename = "major_pl")] - MajorPl, -} - -// from: -impl From<&str> for System { - fn from(s: &str) -> Self { - match s { - "major_en" => System::MajorEn, - "major_pl" => System::MajorPl, - _ => panic!("Unknown system: {}", s), - } - } -} - -pub fn create_decoder(system: &System) -> Box { - match system { - System::MajorPl => Box::new(major::Decoder::new(major::rules_pl::get_rules())), - System::MajorEn => Box::new(major::Decoder::new(major::rules_en::get_rules())), - } -} - -pub async fn create_encoder(system: &System, dict: &dyn DictRepository) -> Box { - // let decoder = create_decoder(&system); - let decoder = major::Decoder::new(match system { - System::MajorPl => major::rules_pl::get_rules(), - System::MajorEn => major::rules_en::get_rules(), - }); - let stream = dict.stream_batches(100).await.unwrap(); // TODO - let lvmap = LenValueMap::from_stream(stream, &decoder).await.unwrap(); // TODO - match system { - System::MajorPl => Box::new(major::Encoder::new(lvmap)), - System::MajorEn => Box::new(major::Encoder::new(lvmap)), - } -} diff --git a/lib/src/sys_major/removeme.old/core/traits.rs b/lib/src/sys_major/removeme.old/core/traits.rs deleted file mode 100644 index e0093f8..0000000 --- a/lib/src/sys_major/removeme.old/core/traits.rs +++ /dev/null @@ -1,42 +0,0 @@ -use futures::stream::BoxStream; - -use crate::core::entities::EncodedValue; -use crate::core::errors::CodecError; - -use super::entities::{DecodedValue, Dict, DictEntry}; -use super::errors::RepositoryError; - -pub trait SystemDecoder: Send + Sync { - fn decode(&self, word: &str) -> Result; -} - -pub trait SystemEncoder: Send + Sync { - fn initialize(&self) -> Result<(), CodecError>; - fn encode(&self, word: &str) -> Result; -} - -#[async_trait::async_trait] -pub trait DictRepository: Send + Sync { - fn use_dict(&mut self, name: &str); - async fn create_dict(&self) -> Result<(), RepositoryError>; - - /// "Upsert" logic: - /// - If entry exists (by text), update metadata. - /// - If not, insert new. - /// - IDs are handled by the Database. - async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError>; - - /// Fetch a page of entries. - async fn fetch_many(&self, limit: usize, offset: usize) -> Result; - - /// Returns a cold stream that fetches strings in chunks. - /// The stream yields `Result, RepositoryError>`. - async fn stream_batches( - &self, - batch_size: usize, - ) -> Result, RepositoryError>>, RepositoryError>; -} - -pub trait DictSource { - fn next_entry(&mut self) -> Option>; -} diff --git a/lib/src/sys_major/removeme.old/infrastructure.rs b/lib/src/sys_major/removeme.old/infrastructure.rs deleted file mode 100644 index 48040c8..0000000 --- a/lib/src/sys_major/removeme.old/infrastructure.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod errors; -pub mod json_file_dict_source; -pub mod sqlite_dict_repository; diff --git a/lib/src/sys_major/removeme.old/infrastructure/errors.rs b/lib/src/sys_major/removeme.old/infrastructure/errors.rs deleted file mode 100644 index 46b8187..0000000 --- a/lib/src/sys_major/removeme.old/infrastructure/errors.rs +++ /dev/null @@ -1,13 +0,0 @@ -// use thiserror::Error; - -// #[derive(Error, Debug)] -// pub enum RepositoryError { -// #[error("Database connection failed")] -// ConnectionFailed(#[source] sqlx::Error), -// #[error("Database query failed: {0}")] -// QueryFailed(#[source] sqlx::Error), -// #[error("Dictionary '{0}' not found")] -// NotFound(String), -// #[error("Invalid data encountered")] -// InvalidData, -// } diff --git a/lib/src/sys_major/removeme.old/infrastructure/json_file_dict_source.rs b/lib/src/sys_major/removeme.old/infrastructure/json_file_dict_source.rs deleted file mode 100644 index 760fb94..0000000 --- a/lib/src/sys_major/removeme.old/infrastructure/json_file_dict_source.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::core::entities::DictEntry; -use crate::core::traits::DictSource; -use serde::Deserialize; -use std::collections::HashMap; -use std::fs::File; -use std::io::BufReader; -use std::path::Path; - -// The "Wire Format". -// It exists ONLY here to map external JSON names to internal Entity names. -#[derive(Deserialize)] -struct JsonEntry { - word: String, - metadata: Option>, -} - -pub struct JsonFileDictSource { - entries: Vec, - current_index: usize, - next_id: u32, -} - -impl JsonFileDictSource { - pub fn new>(path: P) -> anyhow::Result { - let file = File::open(path)?; - let reader = BufReader::new(file); - - // Parse as JSON array - let json_entries: Vec = serde_json::from_reader(reader)?; - - // Convert to DictEntry with auto-generated IDs - let mut entries = Vec::new(); - for (index, json_entry) in json_entries.into_iter().enumerate() { - let id = (index + 1) as u64; // Auto-generate ID starting from 1 - - // Convert metadata from serde_json::Value to HashMap - let metadata = if let Some(meta) = json_entry.metadata { - meta.into_iter() - .map(|(k, v)| { - ( - k, - match v { - serde_json::Value::String(s) => s, - _ => v.to_string(), - }, - ) - }) - .collect() - } else { - HashMap::new() - }; - - entries.push(DictEntry { - id: Some(id), - text: json_entry.word, - metadata, - }); - } - - let entries_len = entries.len(); - Ok(Self { - entries, - current_index: 0, - next_id: (entries_len + 1) as u32, - }) - } - - pub fn new_with_existing_ids>(path: P, start_id: u32) -> anyhow::Result { - let mut source = Self::new(path)?; - source.next_id = start_id; - Ok(source) - } -} - -impl DictSource for JsonFileDictSource { - fn next_entry(&mut self) -> Option> { - if self.current_index < self.entries.len() { - let entry = self.entries[self.current_index].clone(); - self.current_index += 1; - Some(Ok(entry)) - } else { - None - } - } -} diff --git a/lib/src/sys_major/removeme.old/infrastructure/sqlite_dict_repository.rs b/lib/src/sys_major/removeme.old/infrastructure/sqlite_dict_repository.rs deleted file mode 100644 index 6915716..0000000 --- a/lib/src/sys_major/removeme.old/infrastructure/sqlite_dict_repository.rs +++ /dev/null @@ -1,231 +0,0 @@ -use crate::core::entities::{Dict, DictEntry}; -use crate::core::errors::RepositoryError; -use crate::core::traits::DictRepository; -use futures::TryStreamExt; -use futures::stream::BoxStream; - -use sqlx::{Row, SqlitePool, sqlite::SqliteConnectOptions}; -use std::collections::HashMap; -use std::str::FromStr; - -#[derive(sqlx::FromRow)] -struct SqliteEntryDto { - id: i64, - text: String, - // sqlx reads the DB column into this specific wrapper - metadata: sqlx::types::Json>, -} - -// Mapper: DTO -> Domain Entity -impl From for DictEntry { - fn from(dto: SqliteEntryDto) -> Self { - Self { - id: Some(dto.id as u64), - text: dto.text, - // Unwrap the sqlx wrapper to get the inner HashMap - metadata: dto.metadata.0, - } - } -} - -// --- REPOSITORY IMPLEMENTATION --- - -#[derive(Clone)] -pub struct SqliteDictRepository { - pool: SqlitePool, - dict_name: String, -} - -impl SqliteDictRepository { - pub async fn new(database_url: &str) -> Result { - let options = SqliteConnectOptions::from_str(database_url) - .map_err(|_| RepositoryError::ConnectionFailed)? - .create_if_missing(true); - - let pool = SqlitePool::connect_with(options) - .await - .map_err(|_| RepositoryError::ConnectionFailed)?; - - // Ensure tables exist with proper Normalization and Constraints - sqlx::query( - r#" - CREATE TABLE IF NOT EXISTS dictionaries ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL UNIQUE, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP - ); - - CREATE TABLE IF NOT EXISTS entries ( - id INTEGER PRIMARY KEY, - dictionary_id INTEGER NOT NULL, - text TEXT NOT NULL, - metadata TEXT, - updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY(dictionary_id) REFERENCES dictionaries(id) ON DELETE CASCADE, - -- This constraint allows us to update existing words instead of duplicating them - UNIQUE(dictionary_id, text) - ); - "#, - ) - .execute(&pool) - .await - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - - Ok(Self { - pool: pool, - dict_name: "default_dict".into(), - }) - } - - // Helper: Resolve dictionary name to ID - async fn get_dict_id(&self) -> Result { - let row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?") - .bind(&self.dict_name) - .fetch_optional(&self.pool) - .await - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - - match row { - Some(r) => Ok(r.get("id")), - None => Err(RepositoryError::NotFound(self.dict_name.clone())), - } - } -} - -#[async_trait::async_trait] -impl DictRepository for SqliteDictRepository { - async fn create_dict(&self) -> Result<(), RepositoryError> { - sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)") - .bind(&self.dict_name) - .execute(&self.pool) - .await - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - Ok(()) - } - - fn use_dict(&mut self, name: &str) { - self.dict_name = name.to_string(); - } - - async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError> { - let mut tx = self - .pool - .begin() - .await - .map_err(|_| RepositoryError::ConnectionFailed)?; - - // 1. Get Dict ID - let dict_id_row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?") - .bind(&self.dict_name) - .fetch_optional(&mut *tx) - .await - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - - let dict_id: i64 = match dict_id_row { - Some(row) => row.get("id"), - None => return Err(RepositoryError::NotFound(self.dict_name.clone())), - }; - - // 2. Batch Upsert - for entry in entries { - // We must wrap the HashMap in sqlx::types::Json so SQLx knows how to serialize it - let meta_json = sqlx::types::Json(&entry.metadata); - - sqlx::query( - r#" - INSERT INTO entries (dictionary_id, text, metadata) - VALUES (?, ?, ?) - ON CONFLICT(dictionary_id, text) DO UPDATE SET - metadata = excluded.metadata, - updated_at = CURRENT_TIMESTAMP - "#, - ) - .bind(dict_id) - .bind(&entry.text) - .bind(meta_json) - .execute(&mut *tx) - .await - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - } - - tx.commit() - .await - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - Ok(()) - } - - async fn fetch_many(&self, limit: usize, offset: usize) -> Result { - // Get Dict ID - let dict_id = self.get_dict_id().await?; - - // Query (Reading into the DTO) - let dtos = sqlx::query_as::<_, SqliteEntryDto>( - r#" - SELECT id, text, metadata - FROM entries - WHERE dictionary_id = ? - LIMIT ? OFFSET ? - "#, - ) - .bind(dict_id) - .bind(limit as u32) - .bind(offset as u32) - .fetch_all(&self.pool) - .await - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - - // 4. Convert DTOs to Domain Dict - let mut entries_map = HashMap::new(); - for dto in dtos { - let entry: DictEntry = dto.into(); // Converts DTO -> Entity - - // We safely unwrap because the DB guarantees an ID exists - if let Some(id) = entry.id { - entries_map.insert(id, entry); - } - } - - Ok(Dict { - name: self.dict_name.clone(), - entries: entries_map, - }) - } - - async fn stream_batches( - &self, - batch_size: usize, - ) -> Result, RepositoryError>>, RepositoryError> { - // 1. Resolve ID first - let dict_id = self.get_dict_id().await?; - - // 2. Create the base query stream. - // We do NOT use limit/offset. We let the DB stream rows via a cursor. - let query_stream = sqlx::query("SELECT text FROM entries WHERE dictionary_id = ?") - .bind(dict_id) - .fetch(&self.pool); - - // 3. Transform the stream using Functional combinators - let stream = query_stream - // Map SQLx errors to Domain errors - .map_err(|e| RepositoryError::StorageError(e.to_string())) - // Extract the String from the Row - .and_then(|row| async move { - // 'text' is the column name - let text: String = row - .try_get("text") - .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - Ok(text) - }) - // Group items into vectors of size `batch_size` - .try_chunks(batch_size) - // try_chunks returns a specific error type on failure, map it back - .map_err(|e| { - // logic to handle leftover elements if error occurs, - // but for simplicity, we treat stream errors as fatal here - RepositoryError::StorageError(e.to_string()) - }); - - // 4. Box the stream to erase the complex iterator type (Type Erasure) - Ok(Box::pin(stream)) - } -} diff --git a/lib/src/sys_major/removeme.old/presentation.rs b/lib/src/sys_major/removeme.old/presentation.rs deleted file mode 100644 index 74f47ad..0000000 --- a/lib/src/sys_major/removeme.old/presentation.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod server;