Browse Source

WIP: lv_map tests

develop-refactor
chodak166 5 months ago
parent
commit
05be6054a2
  1. 12
      example_dict.json
  2. 7
      lib/Cargo.toml
  3. 8
      lib/src/core/errors.rs
  4. 271
      lib/src/core/sys_major/lvmap.rs
  5. 2
      lib/src/core/traits.rs

12
example_dict.json

@ -1,12 +0,0 @@
[
{"word": "hello", "metadata": {"type": "greeting", "language": "english"}},
{"word": "world", "metadata": {"type": "noun", "language": "english"}},
{"word": "rust", "metadata": {"type": "programming_language", "paradigm": "systems"}},
{"word": "programming", "metadata": {"type": "verb", "context": "computing"}},
{"word": "database", "metadata": {"type": "noun", "context": "data_storage"}},
{"word": "sqlite", "metadata": {"type": "database_engine", "features": ["embedded", "sql"]}},
{"word": "json", "metadata": {"type": "data_format", "standard": "RFC 8259"}},
{"word": "import", "metadata": {"type": "verb", "context": "data_operations"}},
{"word": "dictionary", "metadata": {"type": "noun", "context": "reference"}},
{"word": "example", "metadata": {"type": "noun", "usage": "demonstration"}}
]

7
lib/Cargo.toml

@ -14,7 +14,10 @@ anyhow = "1.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
thiserror = "1.0" thiserror = "2.0"
async-trait = "0.1" async-trait = "0.1"
parking_lot = "0.12" parking_lot = "0.12"
sqlx = { version = "0.7", features = ["runtime-tokio", "sqlite", "chrono", "migrate"] } sqlx = { version = "0.8.6", features = ["runtime-tokio", "sqlite", "chrono", "migrate"] }
[dev-dependencies]
mockall = "0.14.0"

8
lib/src/core/errors.rs

@ -17,3 +17,11 @@ pub enum EncoderError {
#[error("Unexpected error: {0}")] #[error("Unexpected error: {0}")]
UnexpectedError(String), UnexpectedError(String),
} }
#[derive(Error, Debug)]
pub enum DecoderError {
#[error("Decoder input error")]
InputError,
#[error("Unexpected error: {0}")]
UnexpectedError(String),
}

271
lib/src/core/sys_major/lvmap.rs

@ -1,6 +1,10 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::core::{DictRepository, SystemEncoder}; use anyhow::Error;
use crate::core::errors::RepositoryError;
use crate::core::traits::DecodedValue;
use crate::core::{DictRepository, SystemDecoder};
// We store words by encoded number length, then encoded value // We store words by encoded number length, then encoded value
// Example: // Example:
@ -15,6 +19,7 @@ use crate::core::{DictRepository, SystemEncoder};
// - 45: // - 45:
// - word: oral // - word: oral
// Words are fetched from DictRepository in batches
const DEFAULT_DICT_BATCH_SIZE: usize = 100; const DEFAULT_DICT_BATCH_SIZE: usize = 100;
type ValueLength = u8; type ValueLength = u8;
@ -23,35 +28,66 @@ pub type LenValueData = HashMap<ValueLength, HashMap<Value, Vec<String>>>;
#[derive(Debug)] #[derive(Debug)]
pub struct LenValueMap { pub struct LenValueMap {
map: LenValueData, data: LenValueData,
batch_size: usize,
} }
impl LenValueMap { impl LenValueMap {
pub fn new() -> Self { pub fn new() -> Self {
LenValueMap { LenValueMap {
map: LenValueData::new(), data: LenValueData::new(),
batch_size: DEFAULT_DICT_BATCH_SIZE,
} }
} }
pub fn with_data(mut self, data: LenValueData) -> Self { pub fn with_data(mut self, data: LenValueData) -> Self {
self.map = data; self.data = data;
self self
} }
pub fn with_batch_size(mut self, batch_size: usize) -> Self { pub fn data(&self) -> &LenValueData {
self.batch_size = batch_size; &self.data
self
} }
pub fn build<E: SystemEncoder, R: DictRepository>(encoder: &E, repo: &R) -> Self { pub fn into_data(self) -> LenValueData {
let mut map = LenValueData::new(); self.data
}
LenValueMap::new().with_data(map) pub fn insert_words(
&mut self,
words: Vec<String>,
decoder: &impl SystemDecoder,
) -> Result<(), Error> {
for word in words {
if word.is_empty() {
continue;
}
let decoded = decoder.decode(&word);
let int_value = decoded.parse::<u64>()?;
let len = decoded.len() as u8;
if let Some(len_item) = self.data.get_mut(&len) {
if let Some(value_item) = len_item.get_mut(&int_value) {
value_item.push(word);
} else {
len_item.insert(int_value, vec![word]);
}
} else {
self.data
.insert(len, HashMap::from([(int_value, vec![word])]));
}
}
Ok(())
}
pub fn from_dict(decoder: &impl SystemDecoder, repo: &impl DictRepository) -> Self {
Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE) // is that common approach?
} }
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.map.is_empty() self.data.is_empty()
}
fn build(decoder: &impl SystemDecoder, repo: &impl DictRepository, batch_size: usize) -> Self {
todo!()
} }
} }
@ -61,137 +97,118 @@ mod tests {
use crate::core::{entities::*, errors::*}; use crate::core::{entities::*, errors::*};
use std::collections::HashMap; use std::collections::HashMap;
#[test] use mockall::automock;
fn test_no_inpout_then_empty_map() { use mockall::{mock, predicate::*};
let repo = MockRepository::new().with_single_word_dict();
let encoder = MockEncoder::new(EncodingResult {
input: "".to_string(),
output: vec![],
});
let lv_map = LenValueMap::build(&encoder, &repo);
assert!(lv_map.is_empty());
}
// #[test]
// fn test_empty_dict_then_no_output() {
// let dict = Dict {
// name: "test".to_string(),
// entries: HashMap::new(),
// };
// let repo = MockRepository::new(dict);
// let encoder = Encoder::new(&repo);
// let result = encoder.encode("test").unwrap();
// assert!(result.output.is_empty());
// }
// #[test] const TEST_WORD_1: &str = "test_word_1";
// fn test_encode_single_digit() { const TEST_WORD_2: &str = "test_word_2";
// let dict = get_single_word_dict(); const TEST_WORD_3: &str = "test_word_3";
// let repo = MockRepository::new(dict); const TEST_WORD_4: &str = "test_word_4";
// let encoder = Encoder::new(&repo).with_batch_size(1); const TEST_NUM_1: u64 = 12;
// let result = encoder.encode("test").unwrap(); const TEST_NUM_2: u64 = 34;
// assert!(result.output.is_empty()); const TEST_NUM_3: u64 = 9876;
// } const TEST_NUM_1_LEN: u8 = 2;
const TEST_NUM_3_LEN: u8 = 4;
// ---------- Helpers ---------- fn get_test_dec_map() -> HashMap<String, DecodedValue> {
HashMap::from([
fn get_single_word_dict() -> Dict { (TEST_WORD_1.to_string(), TEST_NUM_1.to_string()),
Dict { (TEST_WORD_2.to_string(), TEST_NUM_2.to_string()),
name: "test_dict".to_string(), (TEST_WORD_3.to_string(), TEST_NUM_3.to_string()),
entries: HashMap::from([( (TEST_WORD_4.to_string(), TEST_NUM_3.to_string()),
1, ])
DictEntry {
id: Some(1),
text: "test_word_1".to_string(),
metadata: HashMap::new(),
},
)]),
}
} }
// ---------- Mocks ---------- fn mock_decoding(word: &str) -> DecodedValue {
get_test_dec_map().remove(word).unwrap()
struct MockEncoder {
result: EncodingResult,
} }
impl MockEncoder { fn get_test_words() -> Vec<String> {
fn new(result: EncodingResult) -> Self { vec![
MockEncoder { result } TEST_WORD_1.to_string(),
} TEST_WORD_2.to_string(),
} TEST_WORD_3.to_string(),
TEST_WORD_4.to_string(),
impl SystemEncoder for MockEncoder { ]
fn initialize(&self) -> Result<(), EncoderError> {
Ok(())
} }
fn encode(&self, _word: &str) -> Result<EncodingResult, EncoderError> { mock! {
Ok(self.result.clone()) pub Decoder {}
impl SystemDecoder for Decoder {
fn decode(&self, word: &str) -> DecodedValue;
} }
} }
struct MockRepository { #[test]
dict: Dict, fn test_insert_words_empty() {
} let words = vec![];
let mut decoder = MockDecoder::new();
decoder.expect_decode().returning(|_| DecodedValue::new());
impl MockRepository { let mut lv_map = LenValueMap::new();
pub fn new() -> Self { lv_map.insert_words(words, &decoder).unwrap();
MockRepository { assert!(lv_map.is_empty());
dict: Dict::new("test_dict".to_string()),
}
} }
pub fn with_single_word_dict(mut self) -> Self { #[test]
self.dict = get_single_word_dict(); fn test_single_word() {
self let words = vec![TEST_WORD_1.to_string()];
} let mut decoder = MockDecoder::new();
} decoder
.expect_decode()
.returning(|word| mock_decoding(word));
#[async_trait::async_trait] let mut lv_map = LenValueMap::new();
impl DictRepository for MockRepository { lv_map.insert_words(words, &decoder).unwrap();
async fn create_dict(&self, _name: &str) -> Result<(), RepositoryError> {
Ok(())
}
async fn save_entries( let data = lv_map.into_data();
&self,
_dict_name: &str,
_entries: &[DictEntry],
) -> Result<(), RepositoryError> {
todo!()
}
async fn fetch_many( assert_eq!(data.len(), 1);
&self, assert!(data.contains_key(&TEST_NUM_1_LEN));
_name: &str, let data = data.get(&TEST_NUM_1_LEN).unwrap();
limit: Option<u32>, assert!(data.contains_key(&TEST_NUM_1));
offset: Option<u32>, let words = data.get(&TEST_NUM_1).unwrap();
) -> Result<Dict, RepositoryError> { assert_eq!(words.len(), 1);
let offset = offset.unwrap_or(0) as usize; assert_eq!(words[0], TEST_WORD_1);
let limit = limit.unwrap_or(u32::MAX) as usize;
let mut entries_vec: Vec<_> = self.dict.entries.iter().collect();
entries_vec.sort_by_key(|&(id, _)| *id);
let paginated = entries_vec.into_iter().skip(offset).take(limit);
let paginated_map: HashMap<u64, DictEntry> = paginated
.map(|(id, entry)| {
(
*id,
DictEntry {
id: entry.id,
text: entry.text.clone(),
metadata: entry.metadata.clone(),
},
)
})
.collect();
Ok(Dict {
name: self.dict.name.clone(),
entries: paginated_map,
})
} }
#[test]
fn test_multiple_words() {
let words = get_test_words();
let mut decoder = MockDecoder::new();
decoder
.expect_decode()
.returning(|word| mock_decoding(word));
let mut lv_map = LenValueMap::new();
lv_map.insert_words(words, &decoder).unwrap();
let data = lv_map.into_data();
assert_eq!(data.len(), 2); // two different lengths
assert!(data.contains_key(&TEST_NUM_1_LEN));
assert!(data.contains_key(&TEST_NUM_3_LEN));
let l2 = data.get(&TEST_NUM_1_LEN).unwrap();
let l4 = data.get(&TEST_NUM_3_LEN).unwrap();
assert_eq!(l2.len(), 2); // two numbers
assert_eq!(l4.len(), 1); // one number
assert!(l2.contains_key(&TEST_NUM_1));
assert!(l2.contains_key(&TEST_NUM_2));
assert!(l4.contains_key(&TEST_NUM_3));
let words = l2.get(&TEST_NUM_1).unwrap();
assert_eq!(words.len(), 1);
assert_eq!(words[0], TEST_WORD_1);
let words = l2.get(&TEST_NUM_2).unwrap();
assert_eq!(words.len(), 1);
assert_eq!(words[0], TEST_WORD_2);
let words = l4.get(&TEST_NUM_3).unwrap();
assert_eq!(words.len(), 2);
assert_eq!(words[0], TEST_WORD_3);
assert_eq!(words[1], TEST_WORD_4);
} }
} }

2
lib/src/core/traits.rs

@ -8,7 +8,7 @@ use super::errors::RepositoryError;
/// but decoded as one number. For partial values, we can use /// but decoded as one number. For partial values, we can use
/// u64, but for the whole decoded value that may be very long, /// u64, but for the whole decoded value that may be very long,
/// we need a string. /// we need a string.
type DecodedValue = String; pub type DecodedValue = String;
pub trait SystemDecoder { pub trait SystemDecoder {
fn decode(&self, word: &str) -> DecodedValue; fn decode(&self, word: &str) -> DecodedValue;

Loading…
Cancel
Save