Browse Source

WIP: lv_map refactor & tests

develop-refactor
chodak166 5 months ago
parent
commit
7e19771353
  1. 12
      lib/src/application/services.rs
  2. 181
      lib/src/core/sys_major/lvmap.rs
  3. 16
      lib/src/core/traits.rs
  4. 53
      lib/src/infrastructure/sqlite_dict_repository.rs
  5. 2
      lib/src/presentation/cli/commands/import_dict.rs

12
lib/src/application/services.rs

@ -13,13 +13,9 @@ impl<'a, R: DictRepository> DictImporter<'a, R> {
}
}
pub async fn import(
&self,
name: &str,
mut source: impl DictSource,
) -> Result<(), anyhow::Error> {
pub async fn import(&self, mut source: impl DictSource) -> Result<(), anyhow::Error> {
// 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?)
self.repo.create_dict(name).await?;
self.repo.create_dict().await?;
let mut batch = Vec::with_capacity(self.batch_size);
@ -34,7 +30,7 @@ impl<'a, R: DictRepository> DictImporter<'a, R> {
// 3. Batch Write
if batch.len() >= self.batch_size {
self.repo.save_entries(name, &batch).await?;
self.repo.save_entries(&batch).await?;
batch.clear();
}
}
@ -48,7 +44,7 @@ impl<'a, R: DictRepository> DictImporter<'a, R> {
// 4. Flush remaining
if !batch.is_empty() {
self.repo.save_entries(name, &batch).await?;
self.repo.save_entries(&batch).await?;
}
Ok(())

181
lib/src/core/sys_major/lvmap.rs

@ -20,11 +20,14 @@ const DEFAULT_DICT_BATCH_SIZE: usize = 100;
#[derive(Error, Debug)]
pub enum LenValueMapError {
#[error("Value parsing error: {0}")]
#[error("value parsing error: {0}")]
Parse(#[from] ParseIntError),
#[error(transparent)]
Codec(#[from] CodecError),
#[error("unable to build encoder data: {0}")]
Build(String),
}
type DecodedNumber = u64;
@ -72,12 +75,42 @@ impl LenValueMap {
Ok(())
}
pub async fn from_dict(decoder: &impl SystemDecoder, repo: &impl DictRepository) -> Self {
Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE)
pub async fn from_dict(
decoder: &impl SystemDecoder,
repo: &impl DictRepository,
) -> Result<Self, LenValueMapError> {
Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE).await
}
async fn build(
decoder: &impl SystemDecoder,
repo: &impl DictRepository,
batch_size: usize,
) -> Result<Self, LenValueMapError> {
let mut map = LenValueMap::new();
let mut offset = 0;
const MAX_OFFSET: usize = 10_000_000;
loop {
let dict = repo
.fetch_many(batch_size, offset)
.await
.map_err(|e| LenValueMapError::Build(e.to_string()))?;
if dict.entries.is_empty() {
break;
}
let words: Vec<String> = dict.entries.into_iter().map(|entry| entry.1.text).collect();
map.insert_words(words, decoder)?;
offset += batch_size;
if offset >= MAX_OFFSET {
break;
}
}
fn build(decoder: &impl SystemDecoder, repo: &impl DictRepository, batch_size: usize) -> Self {
todo!()
Ok(map)
}
}
@ -85,9 +118,10 @@ impl LenValueMap {
mod tests {
use super::*;
use crate::core::{entities::*, errors::*};
use async_trait::async_trait;
use std::collections::HashMap;
use mockall::automock;
use mockall::{Sequence, automock};
use mockall::{mock, predicate::*};
const TEST_WORD_1: &str = "test_word_1";
@ -193,7 +227,138 @@ mod tests {
let words = l4.get(&TEST_NUM_3).unwrap();
assert_eq!(words.len(), 2);
assert_eq!(words[0], TEST_WORD_3);
assert_eq!(words[1], TEST_WORD_4);
assert!(words.contains(&TEST_WORD_3.to_string()));
assert!(words.contains(&TEST_WORD_4.to_string()));
}
#[test]
fn test_decoder_error_propagates() {
let mut decoder = MockDecoder::new();
decoder
.expect_decode()
.returning(|_| Err(CodecError::UnexpectedError("boom".into())));
let mut map = LenValueMap::new();
let result = map.insert_words(vec!["x".into()], &decoder);
assert!(result.is_err());
}
// --- build ---
mock! {
pub Repo {}
#[async_trait]
impl DictRepository for Repo {
async fn create_dict(&self) -> Result<(), RepositoryError>;
fn use_dict(&mut self, _name: &str);
async fn save_entries(&self, _entry: &[DictEntry]) -> Result<(), RepositoryError>;
async fn fetch_many(
&self,
limit: usize,
offset: usize,
) -> Result<Dict, RepositoryError>;
}
}
fn dict_with_words(words: &[&str]) -> Dict {
let mut dict = Dict::new("default".into());
for (i, word) in words.iter().enumerate() {
dict.add_entry(DictEntry::new(Some(i as u64), word.to_string()));
}
dict
}
#[tokio::test]
async fn test_build_single_batch() {
let mut repo = MockRepo::new();
let mut decoder = MockDecoder::new();
let mut seq = Sequence::new();
decoder
.expect_decode()
.returning(|word| mock_decoding(word));
// FIRST CALL expectation (will be called first)
repo.expect_fetch_many()
.times(1) // Explicitly expect 1 call
.in_sequence(&mut seq) // Enforce order
.returning(|_, _| Ok(dict_with_words(&[TEST_WORD_1])));
// SECOND CALL expectation (will be called second)
repo.expect_fetch_many()
.times(1)
.in_sequence(&mut seq)
.returning(|_, _| Ok(Dict::new("default_dict".into())));
let data = LenValueMap::build(&decoder, &repo, 1)
.await
.unwrap()
.into_data();
assert_eq!(data.len(), 1);
}
#[tokio::test]
async fn test_build_multiple_batches() {
let mut repo = MockRepo::new();
let mut decoder = MockDecoder::new();
let mut seq = Sequence::new();
decoder
.expect_decode()
.returning(|word| mock_decoding(word));
repo.expect_fetch_many()
.times(1) // Explicitly expect 1 call
.in_sequence(&mut seq) // Enforce order
.returning(|_, _| Ok(dict_with_words(&[TEST_WORD_1])));
repo.expect_fetch_many()
.times(1) // Explicitly expect 1 call
.in_sequence(&mut seq) // Enforce order
.returning(|_, _| Ok(dict_with_words(&[TEST_WORD_3]))); // word with different decoded length
repo.expect_fetch_many()
.times(1)
.in_sequence(&mut seq)
.returning(|_, _| Ok(Dict::new("default_dict".into())));
let data = LenValueMap::build(&decoder, &repo, 1)
.await
.unwrap()
.into_data();
assert_eq!(data.len(), 2);
}
// #[tokio::test]
// async fn test_build_fetches_multiple_batches() {
// let mut repo = MockRepo::new();
// let mut decoder = MockDecoder::new();
// decoder
// .expect_decode()
// .returning(|word| mock_decoding(word));
// repo.expect_fetch_many()
// .with(eq("default"), eq(Some(2)), eq(Some(0)))
// .return_once(|_, _, _| Ok(dict_with_words(&[TEST_WORD_1, TEST_WORD_2])));
// repo.expect_fetch_many()
// .with(eq("default"), eq(Some(2)), eq(Some(2)))
// .return_once(|_, _, _| Ok(dict_with_words(&[TEST_WORD_3, TEST_WORD_4])));
// repo.expect_fetch_many()
// .with(eq("default"), eq(Some(2)), eq(Some(4)))
// .return_once(|_, _, _| Ok(Dict::new("default".into())));
// let map = LenValueMap::build(&decoder, &repo, 2).await;
// let data = map.into_data();
// assert_eq!(data.len(), 2);
// }
}

16
lib/src/core/traits.rs

@ -15,25 +15,17 @@ pub trait SystemEncoder {
#[async_trait::async_trait]
pub trait DictRepository: Send + Sync {
async fn create_dict(&self, name: &str) -> Result<(), RepositoryError>;
fn use_dict(&mut self, name: &str);
async fn create_dict(&self) -> Result<(), RepositoryError>;
/// "Upsert" logic:
/// - If entry exists (by text), update metadata.
/// - If not, insert new.
/// - IDs are handled by the Database.
async fn save_entries(
&self,
dict_name: &str,
entries: &[DictEntry],
) -> Result<(), RepositoryError>;
async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError>;
/// Fetch a page of entries.
async fn fetch_many(
&self,
name: &str,
limit: Option<u32>,
offset: Option<u32>,
) -> Result<Dict, RepositoryError>;
async fn fetch_many(&self, limit: usize, offset: usize) -> Result<Dict, RepositoryError>;
}
pub trait DictSource {

53
lib/src/infrastructure/sqlite_dict_repository.rs

@ -30,6 +30,7 @@ impl From<SqliteEntryDto> for DictEntry {
#[derive(Clone)]
pub struct SqliteDictRepository {
pool: SqlitePool,
dict_name: String,
}
impl SqliteDictRepository {
@ -67,40 +68,43 @@ impl SqliteDictRepository {
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(Self { pool })
Ok(Self {
pool: pool,
dict_name: "default_dict".into(),
})
}
// Helper: Resolve dictionary name to ID
async fn get_dict_id(&self, name: &str) -> Result<i64, RepositoryError> {
async fn get_dict_id(&self) -> Result<i64, RepositoryError> {
let row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
.bind(name)
.bind(&self.dict_name)
.fetch_optional(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
match row {
Some(r) => Ok(r.get("id")),
None => Err(RepositoryError::NotFound(name.to_string())),
None => Err(RepositoryError::NotFound(self.dict_name.clone())),
}
}
}
#[async_trait::async_trait]
impl DictRepository for SqliteDictRepository {
async fn create_dict(&self, name: &str) -> Result<(), RepositoryError> {
async fn create_dict(&self) -> Result<(), RepositoryError> {
sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)")
.bind(name)
.bind(&self.dict_name)
.execute(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(())
}
async fn save_entries(
&self,
dict_name: &str,
entries: &[DictEntry],
) -> Result<(), RepositoryError> {
fn use_dict(&mut self, name: &str) {
self.dict_name = name.to_string();
}
async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError> {
let mut tx = self
.pool
.begin()
@ -109,14 +113,14 @@ impl DictRepository for SqliteDictRepository {
// 1. Get Dict ID
let dict_id_row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
.bind(dict_name)
.bind(&self.dict_name)
.fetch_optional(&mut *tx)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
let dict_id: i64 = match dict_id_row {
Some(row) => row.get("id"),
None => return Err(RepositoryError::NotFound(dict_name.to_string())),
None => return Err(RepositoryError::NotFound(self.dict_name.clone())),
};
// 2. Batch Upsert
@ -147,20 +151,11 @@ impl DictRepository for SqliteDictRepository {
Ok(())
}
async fn fetch_many(
&self,
name: &str,
limit: Option<u32>,
offset: Option<u32>,
) -> Result<Dict, RepositoryError> {
// 1. Get Dict ID
let dict_id = self.get_dict_id(name).await?;
// 2. Prepare Limits
let limit_val = limit.unwrap_or(1000);
let offset_val = offset.unwrap_or(0);
async fn fetch_many(&self, limit: usize, offset: usize) -> Result<Dict, RepositoryError> {
// Get Dict ID
let dict_id = self.get_dict_id().await?;
// 3. Query (Reading into the DTO)
// Query (Reading into the DTO)
let dtos = sqlx::query_as::<_, SqliteEntryDto>(
r#"
SELECT id, text, metadata
@ -170,8 +165,8 @@ impl DictRepository for SqliteDictRepository {
"#,
)
.bind(dict_id)
.bind(limit_val)
.bind(offset_val)
.bind(limit as u32)
.bind(offset as u32)
.fetch_all(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
@ -188,7 +183,7 @@ impl DictRepository for SqliteDictRepository {
}
Ok(Dict {
name: name.to_string(),
name: self.dict_name.clone(),
entries: entries_map,
})
}

2
lib/src/presentation/cli/commands/import_dict.rs

@ -21,7 +21,7 @@ pub async fn run<R: DictRepository>(
let importer = DictImporter::new(&repository);
// Perform the import (this will call create() first)
match importer.import(&config.name, source).await {
match importer.import(source).await {
Ok(()) => {
info!("Successfully imported dictionary '{}'", config.name);
Ok(())

Loading…
Cancel
Save