diff --git a/lib/src/application/services.rs b/lib/src/application/services.rs index bb11743..84fba4d 100644 --- a/lib/src/application/services.rs +++ b/lib/src/application/services.rs @@ -13,13 +13,9 @@ impl<'a, R: DictRepository> DictImporter<'a, R> { } } - pub async fn import( - &self, - name: &str, - mut source: impl DictSource, - ) -> Result<(), anyhow::Error> { + pub async fn import(&self, mut source: impl DictSource) -> Result<(), anyhow::Error> { // 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?) - self.repo.create_dict(name).await?; + self.repo.create_dict().await?; let mut batch = Vec::with_capacity(self.batch_size); @@ -34,7 +30,7 @@ impl<'a, R: DictRepository> DictImporter<'a, R> { // 3. Batch Write if batch.len() >= self.batch_size { - self.repo.save_entries(name, &batch).await?; + self.repo.save_entries(&batch).await?; batch.clear(); } } @@ -48,7 +44,7 @@ impl<'a, R: DictRepository> DictImporter<'a, R> { // 4. Flush remaining if !batch.is_empty() { - self.repo.save_entries(name, &batch).await?; + self.repo.save_entries(&batch).await?; } Ok(()) diff --git a/lib/src/core/sys_major/lvmap.rs b/lib/src/core/sys_major/lvmap.rs index 74ace5c..2ec83fc 100644 --- a/lib/src/core/sys_major/lvmap.rs +++ b/lib/src/core/sys_major/lvmap.rs @@ -20,11 +20,14 @@ const DEFAULT_DICT_BATCH_SIZE: usize = 100; #[derive(Error, Debug)] pub enum LenValueMapError { - #[error("Value parsing error: {0}")] + #[error("value parsing error: {0}")] Parse(#[from] ParseIntError), #[error(transparent)] Codec(#[from] CodecError), + + #[error("unable to build encoder data: {0}")] + Build(String), } type DecodedNumber = u64; @@ -72,12 +75,42 @@ impl LenValueMap { Ok(()) } - pub async fn from_dict(decoder: &impl SystemDecoder, repo: &impl DictRepository) -> Self { - Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE) + pub async fn from_dict( + decoder: &impl SystemDecoder, + repo: &impl DictRepository, + ) -> Result { + Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE).await } - fn build(decoder: &impl SystemDecoder, repo: &impl DictRepository, batch_size: usize) -> Self { - todo!() + async fn build( + decoder: &impl SystemDecoder, + repo: &impl DictRepository, + batch_size: usize, + ) -> Result { + let mut map = LenValueMap::new(); + let mut offset = 0; + const MAX_OFFSET: usize = 10_000_000; + + loop { + let dict = repo + .fetch_many(batch_size, offset) + .await + .map_err(|e| LenValueMapError::Build(e.to_string()))?; + + if dict.entries.is_empty() { + break; + } + + let words: Vec = dict.entries.into_iter().map(|entry| entry.1.text).collect(); + map.insert_words(words, decoder)?; + + offset += batch_size; + if offset >= MAX_OFFSET { + break; + } + } + + Ok(map) } } @@ -85,9 +118,10 @@ impl LenValueMap { mod tests { use super::*; use crate::core::{entities::*, errors::*}; + use async_trait::async_trait; use std::collections::HashMap; - use mockall::automock; + use mockall::{Sequence, automock}; use mockall::{mock, predicate::*}; const TEST_WORD_1: &str = "test_word_1"; @@ -193,7 +227,138 @@ mod tests { let words = l4.get(&TEST_NUM_3).unwrap(); assert_eq!(words.len(), 2); - assert_eq!(words[0], TEST_WORD_3); - assert_eq!(words[1], TEST_WORD_4); + assert!(words.contains(&TEST_WORD_3.to_string())); + assert!(words.contains(&TEST_WORD_4.to_string())); + } + + #[test] + fn test_decoder_error_propagates() { + let mut decoder = MockDecoder::new(); + decoder + .expect_decode() + .returning(|_| Err(CodecError::UnexpectedError("boom".into()))); + + let mut map = LenValueMap::new(); + let result = map.insert_words(vec!["x".into()], &decoder); + + assert!(result.is_err()); + } + + // --- build --- + + mock! { + pub Repo {} + + #[async_trait] + impl DictRepository for Repo { + async fn create_dict(&self) -> Result<(), RepositoryError>; + fn use_dict(&mut self, _name: &str); + async fn save_entries(&self, _entry: &[DictEntry]) -> Result<(), RepositoryError>; + async fn fetch_many( + &self, + limit: usize, + offset: usize, + ) -> Result; + } + } + + fn dict_with_words(words: &[&str]) -> Dict { + let mut dict = Dict::new("default".into()); + + for (i, word) in words.iter().enumerate() { + dict.add_entry(DictEntry::new(Some(i as u64), word.to_string())); + } + + dict + } + + #[tokio::test] + async fn test_build_single_batch() { + let mut repo = MockRepo::new(); + let mut decoder = MockDecoder::new(); + let mut seq = Sequence::new(); + + decoder + .expect_decode() + .returning(|word| mock_decoding(word)); + + // FIRST CALL expectation (will be called first) + repo.expect_fetch_many() + .times(1) // Explicitly expect 1 call + .in_sequence(&mut seq) // Enforce order + .returning(|_, _| Ok(dict_with_words(&[TEST_WORD_1]))); + + // SECOND CALL expectation (will be called second) + repo.expect_fetch_many() + .times(1) + .in_sequence(&mut seq) + .returning(|_, _| Ok(Dict::new("default_dict".into()))); + + let data = LenValueMap::build(&decoder, &repo, 1) + .await + .unwrap() + .into_data(); + + assert_eq!(data.len(), 1); + } + + #[tokio::test] + async fn test_build_multiple_batches() { + let mut repo = MockRepo::new(); + let mut decoder = MockDecoder::new(); + let mut seq = Sequence::new(); + + decoder + .expect_decode() + .returning(|word| mock_decoding(word)); + + repo.expect_fetch_many() + .times(1) // Explicitly expect 1 call + .in_sequence(&mut seq) // Enforce order + .returning(|_, _| Ok(dict_with_words(&[TEST_WORD_1]))); + + repo.expect_fetch_many() + .times(1) // Explicitly expect 1 call + .in_sequence(&mut seq) // Enforce order + .returning(|_, _| Ok(dict_with_words(&[TEST_WORD_3]))); // word with different decoded length + + repo.expect_fetch_many() + .times(1) + .in_sequence(&mut seq) + .returning(|_, _| Ok(Dict::new("default_dict".into()))); + + let data = LenValueMap::build(&decoder, &repo, 1) + .await + .unwrap() + .into_data(); + + assert_eq!(data.len(), 2); } + + // #[tokio::test] + // async fn test_build_fetches_multiple_batches() { + // let mut repo = MockRepo::new(); + // let mut decoder = MockDecoder::new(); + + // decoder + // .expect_decode() + // .returning(|word| mock_decoding(word)); + + // repo.expect_fetch_many() + // .with(eq("default"), eq(Some(2)), eq(Some(0))) + // .return_once(|_, _, _| Ok(dict_with_words(&[TEST_WORD_1, TEST_WORD_2]))); + + // repo.expect_fetch_many() + // .with(eq("default"), eq(Some(2)), eq(Some(2))) + // .return_once(|_, _, _| Ok(dict_with_words(&[TEST_WORD_3, TEST_WORD_4]))); + + // repo.expect_fetch_many() + // .with(eq("default"), eq(Some(2)), eq(Some(4))) + // .return_once(|_, _, _| Ok(Dict::new("default".into()))); + + // let map = LenValueMap::build(&decoder, &repo, 2).await; + // let data = map.into_data(); + + // assert_eq!(data.len(), 2); + // } } diff --git a/lib/src/core/traits.rs b/lib/src/core/traits.rs index 673403b..34d56d6 100644 --- a/lib/src/core/traits.rs +++ b/lib/src/core/traits.rs @@ -15,25 +15,17 @@ pub trait SystemEncoder { #[async_trait::async_trait] pub trait DictRepository: Send + Sync { - async fn create_dict(&self, name: &str) -> Result<(), RepositoryError>; + fn use_dict(&mut self, name: &str); + async fn create_dict(&self) -> Result<(), RepositoryError>; /// "Upsert" logic: /// - If entry exists (by text), update metadata. /// - If not, insert new. /// - IDs are handled by the Database. - async fn save_entries( - &self, - dict_name: &str, - entries: &[DictEntry], - ) -> Result<(), RepositoryError>; + async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError>; /// Fetch a page of entries. - async fn fetch_many( - &self, - name: &str, - limit: Option, - offset: Option, - ) -> Result; + async fn fetch_many(&self, limit: usize, offset: usize) -> Result; } pub trait DictSource { diff --git a/lib/src/infrastructure/sqlite_dict_repository.rs b/lib/src/infrastructure/sqlite_dict_repository.rs index 71f5134..08b042a 100644 --- a/lib/src/infrastructure/sqlite_dict_repository.rs +++ b/lib/src/infrastructure/sqlite_dict_repository.rs @@ -30,6 +30,7 @@ impl From for DictEntry { #[derive(Clone)] pub struct SqliteDictRepository { pool: SqlitePool, + dict_name: String, } impl SqliteDictRepository { @@ -67,40 +68,43 @@ impl SqliteDictRepository { .await .map_err(|e| RepositoryError::StorageError(e.to_string()))?; - Ok(Self { pool }) + Ok(Self { + pool: pool, + dict_name: "default_dict".into(), + }) } // Helper: Resolve dictionary name to ID - async fn get_dict_id(&self, name: &str) -> Result { + async fn get_dict_id(&self) -> Result { let row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?") - .bind(name) + .bind(&self.dict_name) .fetch_optional(&self.pool) .await .map_err(|e| RepositoryError::StorageError(e.to_string()))?; match row { Some(r) => Ok(r.get("id")), - None => Err(RepositoryError::NotFound(name.to_string())), + None => Err(RepositoryError::NotFound(self.dict_name.clone())), } } } #[async_trait::async_trait] impl DictRepository for SqliteDictRepository { - async fn create_dict(&self, name: &str) -> Result<(), RepositoryError> { + async fn create_dict(&self) -> Result<(), RepositoryError> { sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)") - .bind(name) + .bind(&self.dict_name) .execute(&self.pool) .await .map_err(|e| RepositoryError::StorageError(e.to_string()))?; Ok(()) } - async fn save_entries( - &self, - dict_name: &str, - entries: &[DictEntry], - ) -> Result<(), RepositoryError> { + fn use_dict(&mut self, name: &str) { + self.dict_name = name.to_string(); + } + + async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError> { let mut tx = self .pool .begin() @@ -109,14 +113,14 @@ impl DictRepository for SqliteDictRepository { // 1. Get Dict ID let dict_id_row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?") - .bind(dict_name) + .bind(&self.dict_name) .fetch_optional(&mut *tx) .await .map_err(|e| RepositoryError::StorageError(e.to_string()))?; let dict_id: i64 = match dict_id_row { Some(row) => row.get("id"), - None => return Err(RepositoryError::NotFound(dict_name.to_string())), + None => return Err(RepositoryError::NotFound(self.dict_name.clone())), }; // 2. Batch Upsert @@ -147,20 +151,11 @@ impl DictRepository for SqliteDictRepository { Ok(()) } - async fn fetch_many( - &self, - name: &str, - limit: Option, - offset: Option, - ) -> Result { - // 1. Get Dict ID - let dict_id = self.get_dict_id(name).await?; - - // 2. Prepare Limits - let limit_val = limit.unwrap_or(1000); - let offset_val = offset.unwrap_or(0); + async fn fetch_many(&self, limit: usize, offset: usize) -> Result { + // Get Dict ID + let dict_id = self.get_dict_id().await?; - // 3. Query (Reading into the DTO) + // Query (Reading into the DTO) let dtos = sqlx::query_as::<_, SqliteEntryDto>( r#" SELECT id, text, metadata @@ -170,8 +165,8 @@ impl DictRepository for SqliteDictRepository { "#, ) .bind(dict_id) - .bind(limit_val) - .bind(offset_val) + .bind(limit as u32) + .bind(offset as u32) .fetch_all(&self.pool) .await .map_err(|e| RepositoryError::StorageError(e.to_string()))?; @@ -188,7 +183,7 @@ impl DictRepository for SqliteDictRepository { } Ok(Dict { - name: name.to_string(), + name: self.dict_name.clone(), entries: entries_map, }) } diff --git a/lib/src/presentation/cli/commands/import_dict.rs b/lib/src/presentation/cli/commands/import_dict.rs index f5fb519..eebd650 100644 --- a/lib/src/presentation/cli/commands/import_dict.rs +++ b/lib/src/presentation/cli/commands/import_dict.rs @@ -21,7 +21,7 @@ pub async fn run( let importer = DictImporter::new(&repository); // Perform the import (this will call create() first) - match importer.import(&config.name, source).await { + match importer.import(source).await { Ok(()) => { info!("Successfully imported dictionary '{}'", config.name); Ok(())