diff --git a/lib/src/application/services.rs b/lib/src/application/services.rs index 2085fe2..bb11743 100644 --- a/lib/src/application/services.rs +++ b/lib/src/application/services.rs @@ -19,7 +19,7 @@ impl<'a, R: DictRepository> DictImporter<'a, R> { mut source: impl DictSource, ) -> Result<(), anyhow::Error> { // 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?) - self.repo.create(name).await?; + self.repo.create_dict(name).await?; let mut batch = Vec::with_capacity(self.batch_size); diff --git a/lib/src/core/entities.rs b/lib/src/core/entities.rs index 3e67db4..f670357 100644 --- a/lib/src/core/entities.rs +++ b/lib/src/core/entities.rs @@ -19,6 +19,7 @@ impl DictEntry { } } +#[derive(Debug, Clone)] pub struct Dict { pub name: String, pub entries: HashMap, @@ -37,11 +38,19 @@ impl Dict { } } -// pub struct DecodedItem { -// pub value: String, -// } +#[derive(Debug, Clone)] +pub struct EncodedItem { + pub entry: DictEntry, +} -// pub struct DecodedResult { -// pub input: String, -// pub output: Vec, -// } +impl EncodedItem { + pub fn value(&self) -> &str { + &self.entry.text + } +} + +#[derive(Debug, Clone)] +pub struct EncodingResult { + pub input: String, + pub output: Vec, +} diff --git a/lib/src/core/errors.rs b/lib/src/core/errors.rs index 573a66f..1c9dbe5 100644 --- a/lib/src/core/errors.rs +++ b/lib/src/core/errors.rs @@ -9,3 +9,11 @@ pub enum RepositoryError { #[error("Storage error: {0}")] StorageError(String), } + +#[derive(Error, Debug)] +pub enum EncoderError { + #[error("Encoder initialization failed")] + InitializationFailed, + #[error("Unexpected error: {0}")] + UnexpectedError(String), +} diff --git a/lib/src/core/sys_major.rs b/lib/src/core/sys_major.rs index ae82843..41402c1 100644 --- a/lib/src/core/sys_major.rs +++ b/lib/src/core/sys_major.rs @@ -1,4 +1,6 @@ mod decoder; +mod encoder; +mod lvmap; pub mod rules_en; pub mod rules_pl; @@ -6,3 +8,5 @@ pub mod rules_pl; mod decoder_tests; pub use decoder::*; +pub use encoder::*; +pub use lvmap::LenValueMap; diff --git a/lib/src/core/sys_major/encoder.rs b/lib/src/core/sys_major/encoder.rs new file mode 100644 index 0000000..2ff07a4 --- /dev/null +++ b/lib/src/core/sys_major/encoder.rs @@ -0,0 +1,23 @@ +use crate::core::{ + entities::EncodingResult, errors::EncoderError, sys_major::LenValueMap, traits::*, +}; + +#[derive(Debug)] +pub struct Encoder { + lv_map: LenValueMap, +} + +impl Encoder { + pub fn new(lv_map: LenValueMap) -> Self { + Encoder { lv_map } + } +} + +impl SystemEncoder for Encoder { + fn initialize(&self) -> Result<(), EncoderError> { + Ok(()) + } + fn encode(&self, word: &str) -> Result { + todo!() + } +} diff --git a/lib/src/core/sys_major/lvmap.rs b/lib/src/core/sys_major/lvmap.rs new file mode 100644 index 0000000..e418025 --- /dev/null +++ b/lib/src/core/sys_major/lvmap.rs @@ -0,0 +1,197 @@ +use std::collections::HashMap; + +use crate::core::{DictRepository, SystemEncoder}; + +// We store words by encoded number length, then encoded value +// Example: +// root: +// - 3: +// - 750: +// - word: klasa +// - word: gilza +// - 849: +// - word: farba +// - 2: +// - 45: +// - word: oral + +const DEFAULT_DICT_BATCH_SIZE: usize = 100; + +type ValueLength = u8; +type Value = u64; +pub type LenValueData = HashMap>>; + +#[derive(Debug)] +pub struct LenValueMap { + map: LenValueData, + batch_size: usize, +} + +impl LenValueMap { + pub fn new() -> Self { + LenValueMap { + map: LenValueData::new(), + batch_size: DEFAULT_DICT_BATCH_SIZE, + } + } + pub fn with_data(mut self, data: LenValueData) -> Self { + self.map = data; + self + } + + pub fn with_batch_size(mut self, batch_size: usize) -> Self { + self.batch_size = batch_size; + self + } + + pub fn build(encoder: &E, repo: &R) -> Self { + let mut map = LenValueData::new(); + + LenValueMap::new().with_data(map) + } + + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{entities::*, errors::*}; + use std::collections::HashMap; + + #[test] + fn test_no_inpout_then_empty_map() { + let repo = MockRepository::new().with_single_word_dict(); + let encoder = MockEncoder::new(EncodingResult { + input: "".to_string(), + output: vec![], + }); + let lv_map = LenValueMap::build(&encoder, &repo); + assert!(lv_map.is_empty()); + } + + // #[test] + // fn test_empty_dict_then_no_output() { + // let dict = Dict { + // name: "test".to_string(), + // entries: HashMap::new(), + // }; + // let repo = MockRepository::new(dict); + // let encoder = Encoder::new(&repo); + // let result = encoder.encode("test").unwrap(); + // assert!(result.output.is_empty()); + // } + + // #[test] + // fn test_encode_single_digit() { + // let dict = get_single_word_dict(); + // let repo = MockRepository::new(dict); + // let encoder = Encoder::new(&repo).with_batch_size(1); + // let result = encoder.encode("test").unwrap(); + // assert!(result.output.is_empty()); + // } + + // ---------- Helpers ---------- + + fn get_single_word_dict() -> Dict { + Dict { + name: "test_dict".to_string(), + entries: HashMap::from([( + 1, + DictEntry { + id: Some(1), + text: "test_word_1".to_string(), + metadata: HashMap::new(), + }, + )]), + } + } + + // ---------- Mocks ---------- + + struct MockEncoder { + result: EncodingResult, + } + + impl MockEncoder { + fn new(result: EncodingResult) -> Self { + MockEncoder { result } + } + } + + impl SystemEncoder for MockEncoder { + fn initialize(&self) -> Result<(), EncoderError> { + Ok(()) + } + + fn encode(&self, _word: &str) -> Result { + Ok(self.result.clone()) + } + } + + struct MockRepository { + dict: Dict, + } + + impl MockRepository { + pub fn new() -> Self { + MockRepository { + dict: Dict::new("test_dict".to_string()), + } + } + + pub fn with_single_word_dict(mut self) -> Self { + self.dict = get_single_word_dict(); + self + } + } + + #[async_trait::async_trait] + impl DictRepository for MockRepository { + async fn create_dict(&self, _name: &str) -> Result<(), RepositoryError> { + Ok(()) + } + + async fn save_entries( + &self, + _dict_name: &str, + _entries: &[DictEntry], + ) -> Result<(), RepositoryError> { + todo!() + } + + async fn fetch_many( + &self, + _name: &str, + limit: Option, + offset: Option, + ) -> Result { + let offset = offset.unwrap_or(0) as usize; + let limit = limit.unwrap_or(u32::MAX) as usize; + + let mut entries_vec: Vec<_> = self.dict.entries.iter().collect(); + entries_vec.sort_by_key(|&(id, _)| *id); + + let paginated = entries_vec.into_iter().skip(offset).take(limit); + let paginated_map: HashMap = paginated + .map(|(id, entry)| { + ( + *id, + DictEntry { + id: entry.id, + text: entry.text.clone(), + metadata: entry.metadata.clone(), + }, + ) + }) + .collect(); + + Ok(Dict { + name: self.dict.name.clone(), + entries: paginated_map, + }) + } + } +} diff --git a/lib/src/core/traits.rs b/lib/src/core/traits.rs index 8c81693..f06cbfa 100644 --- a/lib/src/core/traits.rs +++ b/lib/src/core/traits.rs @@ -1,18 +1,27 @@ +use crate::core::entities::EncodingResult; +use crate::core::errors::EncoderError; + use super::entities::{Dict, DictEntry}; use super::errors::RepositoryError; +/// The number value can be encoded as many word sets, +/// but decoded as one number. For partial values, we can use +/// u64, but for the whole decoded value that may be very long, +/// we need a string. +type DecodedValue = String; + pub trait SystemDecoder { - fn decode(&self, word: &str) -> String; + fn decode(&self, word: &str) -> DecodedValue; } -// pub trait SystenDecoder { -// fn initialize(&self) -> Result<(), anyhow::Error>; - -// } +pub trait SystemEncoder { + fn initialize(&self) -> Result<(), EncoderError>; + fn encode(&self, word: &str) -> Result; +} #[async_trait::async_trait] pub trait DictRepository: Send + Sync { - async fn create(&self, name: &str) -> Result<(), RepositoryError>; + async fn create_dict(&self, name: &str) -> Result<(), RepositoryError>; /// "Upsert" logic: /// - If entry exists (by text), update metadata. diff --git a/lib/src/infrastructure/sqlite_dict_repository.rs b/lib/src/infrastructure/sqlite_dict_repository.rs index aef5ea5..71f5134 100644 --- a/lib/src/infrastructure/sqlite_dict_repository.rs +++ b/lib/src/infrastructure/sqlite_dict_repository.rs @@ -87,7 +87,7 @@ impl SqliteDictRepository { #[async_trait::async_trait] impl DictRepository for SqliteDictRepository { - async fn create(&self, name: &str) -> Result<(), RepositoryError> { + async fn create_dict(&self, name: &str) -> Result<(), RepositoryError> { sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)") .bind(name) .execute(&self.pool)