|
|
|
|
@ -20,11 +20,14 @@ const DEFAULT_DICT_BATCH_SIZE: usize = 100;
|
|
|
|
|
|
|
|
|
|
#[derive(Error, Debug)] |
|
|
|
|
pub enum LenValueMapError { |
|
|
|
|
#[error("Value parsing error: {0}")] |
|
|
|
|
#[error("value parsing error: {0}")] |
|
|
|
|
Parse(#[from] ParseIntError), |
|
|
|
|
|
|
|
|
|
#[error(transparent)] |
|
|
|
|
Codec(#[from] CodecError), |
|
|
|
|
|
|
|
|
|
#[error("unable to build encoder data: {0}")] |
|
|
|
|
Build(String), |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
type DecodedNumber = u64; |
|
|
|
|
@ -72,12 +75,42 @@ impl LenValueMap {
|
|
|
|
|
Ok(()) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
pub async fn from_dict(decoder: &impl SystemDecoder, repo: &impl DictRepository) -> Self { |
|
|
|
|
Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE) |
|
|
|
|
pub async fn from_dict( |
|
|
|
|
decoder: &impl SystemDecoder, |
|
|
|
|
repo: &impl DictRepository, |
|
|
|
|
) -> Result<Self, LenValueMapError> { |
|
|
|
|
Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE).await |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
async fn build( |
|
|
|
|
decoder: &impl SystemDecoder, |
|
|
|
|
repo: &impl DictRepository, |
|
|
|
|
batch_size: usize, |
|
|
|
|
) -> Result<Self, LenValueMapError> { |
|
|
|
|
let mut map = LenValueMap::new(); |
|
|
|
|
let mut offset = 0; |
|
|
|
|
const MAX_OFFSET: usize = 10_000_000; |
|
|
|
|
|
|
|
|
|
loop { |
|
|
|
|
let dict = repo |
|
|
|
|
.fetch_many(batch_size, offset) |
|
|
|
|
.await |
|
|
|
|
.map_err(|e| LenValueMapError::Build(e.to_string()))?; |
|
|
|
|
|
|
|
|
|
if dict.entries.is_empty() { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
let words: Vec<String> = dict.entries.into_iter().map(|entry| entry.1.text).collect(); |
|
|
|
|
map.insert_words(words, decoder)?; |
|
|
|
|
|
|
|
|
|
offset += batch_size; |
|
|
|
|
if offset >= MAX_OFFSET { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn build(decoder: &impl SystemDecoder, repo: &impl DictRepository, batch_size: usize) -> Self { |
|
|
|
|
todo!() |
|
|
|
|
Ok(map) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@ -85,9 +118,10 @@ impl LenValueMap {
|
|
|
|
|
mod tests { |
|
|
|
|
use super::*; |
|
|
|
|
use crate::core::{entities::*, errors::*}; |
|
|
|
|
use async_trait::async_trait; |
|
|
|
|
use std::collections::HashMap; |
|
|
|
|
|
|
|
|
|
use mockall::automock; |
|
|
|
|
use mockall::{Sequence, automock}; |
|
|
|
|
use mockall::{mock, predicate::*}; |
|
|
|
|
|
|
|
|
|
const TEST_WORD_1: &str = "test_word_1"; |
|
|
|
|
@ -193,7 +227,138 @@ mod tests {
|
|
|
|
|
|
|
|
|
|
let words = l4.get(&TEST_NUM_3).unwrap(); |
|
|
|
|
assert_eq!(words.len(), 2); |
|
|
|
|
assert_eq!(words[0], TEST_WORD_3); |
|
|
|
|
assert_eq!(words[1], TEST_WORD_4); |
|
|
|
|
assert!(words.contains(&TEST_WORD_3.to_string())); |
|
|
|
|
assert!(words.contains(&TEST_WORD_4.to_string())); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[test] |
|
|
|
|
fn test_decoder_error_propagates() { |
|
|
|
|
let mut decoder = MockDecoder::new(); |
|
|
|
|
decoder |
|
|
|
|
.expect_decode() |
|
|
|
|
.returning(|_| Err(CodecError::UnexpectedError("boom".into()))); |
|
|
|
|
|
|
|
|
|
let mut map = LenValueMap::new(); |
|
|
|
|
let result = map.insert_words(vec!["x".into()], &decoder); |
|
|
|
|
|
|
|
|
|
assert!(result.is_err()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// --- build ---
|
|
|
|
|
|
|
|
|
|
mock! { |
|
|
|
|
pub Repo {} |
|
|
|
|
|
|
|
|
|
#[async_trait] |
|
|
|
|
impl DictRepository for Repo { |
|
|
|
|
async fn create_dict(&self) -> Result<(), RepositoryError>; |
|
|
|
|
fn use_dict(&mut self, _name: &str); |
|
|
|
|
async fn save_entries(&self, _entry: &[DictEntry]) -> Result<(), RepositoryError>; |
|
|
|
|
async fn fetch_many( |
|
|
|
|
&self, |
|
|
|
|
limit: usize, |
|
|
|
|
offset: usize, |
|
|
|
|
) -> Result<Dict, RepositoryError>; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn dict_with_words(words: &[&str]) -> Dict { |
|
|
|
|
let mut dict = Dict::new("default".into()); |
|
|
|
|
|
|
|
|
|
for (i, word) in words.iter().enumerate() { |
|
|
|
|
dict.add_entry(DictEntry::new(Some(i as u64), word.to_string())); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
dict |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[tokio::test] |
|
|
|
|
async fn test_build_single_batch() { |
|
|
|
|
let mut repo = MockRepo::new(); |
|
|
|
|
let mut decoder = MockDecoder::new(); |
|
|
|
|
let mut seq = Sequence::new(); |
|
|
|
|
|
|
|
|
|
decoder |
|
|
|
|
.expect_decode() |
|
|
|
|
.returning(|word| mock_decoding(word)); |
|
|
|
|
|
|
|
|
|
// FIRST CALL expectation (will be called first)
|
|
|
|
|
repo.expect_fetch_many() |
|
|
|
|
.times(1) // Explicitly expect 1 call
|
|
|
|
|
.in_sequence(&mut seq) // Enforce order
|
|
|
|
|
.returning(|_, _| Ok(dict_with_words(&[TEST_WORD_1]))); |
|
|
|
|
|
|
|
|
|
// SECOND CALL expectation (will be called second)
|
|
|
|
|
repo.expect_fetch_many() |
|
|
|
|
.times(1) |
|
|
|
|
.in_sequence(&mut seq) |
|
|
|
|
.returning(|_, _| Ok(Dict::new("default_dict".into()))); |
|
|
|
|
|
|
|
|
|
let data = LenValueMap::build(&decoder, &repo, 1) |
|
|
|
|
.await |
|
|
|
|
.unwrap() |
|
|
|
|
.into_data(); |
|
|
|
|
|
|
|
|
|
assert_eq!(data.len(), 1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[tokio::test] |
|
|
|
|
async fn test_build_multiple_batches() { |
|
|
|
|
let mut repo = MockRepo::new(); |
|
|
|
|
let mut decoder = MockDecoder::new(); |
|
|
|
|
let mut seq = Sequence::new(); |
|
|
|
|
|
|
|
|
|
decoder |
|
|
|
|
.expect_decode() |
|
|
|
|
.returning(|word| mock_decoding(word)); |
|
|
|
|
|
|
|
|
|
repo.expect_fetch_many() |
|
|
|
|
.times(1) // Explicitly expect 1 call
|
|
|
|
|
.in_sequence(&mut seq) // Enforce order
|
|
|
|
|
.returning(|_, _| Ok(dict_with_words(&[TEST_WORD_1]))); |
|
|
|
|
|
|
|
|
|
repo.expect_fetch_many() |
|
|
|
|
.times(1) // Explicitly expect 1 call
|
|
|
|
|
.in_sequence(&mut seq) // Enforce order
|
|
|
|
|
.returning(|_, _| Ok(dict_with_words(&[TEST_WORD_3]))); // word with different decoded length
|
|
|
|
|
|
|
|
|
|
repo.expect_fetch_many() |
|
|
|
|
.times(1) |
|
|
|
|
.in_sequence(&mut seq) |
|
|
|
|
.returning(|_, _| Ok(Dict::new("default_dict".into()))); |
|
|
|
|
|
|
|
|
|
let data = LenValueMap::build(&decoder, &repo, 1) |
|
|
|
|
.await |
|
|
|
|
.unwrap() |
|
|
|
|
.into_data(); |
|
|
|
|
|
|
|
|
|
assert_eq!(data.len(), 2); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// #[tokio::test]
|
|
|
|
|
// async fn test_build_fetches_multiple_batches() {
|
|
|
|
|
// let mut repo = MockRepo::new();
|
|
|
|
|
// let mut decoder = MockDecoder::new();
|
|
|
|
|
|
|
|
|
|
// decoder
|
|
|
|
|
// .expect_decode()
|
|
|
|
|
// .returning(|word| mock_decoding(word));
|
|
|
|
|
|
|
|
|
|
// repo.expect_fetch_many()
|
|
|
|
|
// .with(eq("default"), eq(Some(2)), eq(Some(0)))
|
|
|
|
|
// .return_once(|_, _, _| Ok(dict_with_words(&[TEST_WORD_1, TEST_WORD_2])));
|
|
|
|
|
|
|
|
|
|
// repo.expect_fetch_many()
|
|
|
|
|
// .with(eq("default"), eq(Some(2)), eq(Some(2)))
|
|
|
|
|
// .return_once(|_, _, _| Ok(dict_with_words(&[TEST_WORD_3, TEST_WORD_4])));
|
|
|
|
|
|
|
|
|
|
// repo.expect_fetch_many()
|
|
|
|
|
// .with(eq("default"), eq(Some(2)), eq(Some(4)))
|
|
|
|
|
// .return_once(|_, _, _| Ok(Dict::new("default".into())));
|
|
|
|
|
|
|
|
|
|
// let map = LenValueMap::build(&decoder, &repo, 2).await;
|
|
|
|
|
// let data = map.into_data();
|
|
|
|
|
|
|
|
|
|
// assert_eq!(data.len(), 2);
|
|
|
|
|
// }
|
|
|
|
|
} |
|
|
|
|
|