Browse Source

WIP: lv_map tests

develop-refactor
chodak166 5 months ago
parent
commit
05be6054a2
  1. 12
      example_dict.json
  2. 7
      lib/Cargo.toml
  3. 8
      lib/src/core/errors.rs
  4. 279
      lib/src/core/sys_major/lvmap.rs
  5. 2
      lib/src/core/traits.rs

12
example_dict.json

@ -1,12 +0,0 @@
[
{"word": "hello", "metadata": {"type": "greeting", "language": "english"}},
{"word": "world", "metadata": {"type": "noun", "language": "english"}},
{"word": "rust", "metadata": {"type": "programming_language", "paradigm": "systems"}},
{"word": "programming", "metadata": {"type": "verb", "context": "computing"}},
{"word": "database", "metadata": {"type": "noun", "context": "data_storage"}},
{"word": "sqlite", "metadata": {"type": "database_engine", "features": ["embedded", "sql"]}},
{"word": "json", "metadata": {"type": "data_format", "standard": "RFC 8259"}},
{"word": "import", "metadata": {"type": "verb", "context": "data_operations"}},
{"word": "dictionary", "metadata": {"type": "noun", "context": "reference"}},
{"word": "example", "metadata": {"type": "noun", "usage": "demonstration"}}
]

7
lib/Cargo.toml

@ -14,7 +14,10 @@ anyhow = "1.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
thiserror = "1.0" thiserror = "2.0"
async-trait = "0.1" async-trait = "0.1"
parking_lot = "0.12" parking_lot = "0.12"
sqlx = { version = "0.7", features = ["runtime-tokio", "sqlite", "chrono", "migrate"] } sqlx = { version = "0.8.6", features = ["runtime-tokio", "sqlite", "chrono", "migrate"] }
[dev-dependencies]
mockall = "0.14.0"

8
lib/src/core/errors.rs

@ -17,3 +17,11 @@ pub enum EncoderError {
#[error("Unexpected error: {0}")] #[error("Unexpected error: {0}")]
UnexpectedError(String), UnexpectedError(String),
} }
#[derive(Error, Debug)]
pub enum DecoderError {
#[error("Decoder input error")]
InputError,
#[error("Unexpected error: {0}")]
UnexpectedError(String),
}

279
lib/src/core/sys_major/lvmap.rs

@ -1,6 +1,10 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::core::{DictRepository, SystemEncoder}; use anyhow::Error;
use crate::core::errors::RepositoryError;
use crate::core::traits::DecodedValue;
use crate::core::{DictRepository, SystemDecoder};
// We store words by encoded number length, then encoded value // We store words by encoded number length, then encoded value
// Example: // Example:
@ -15,6 +19,7 @@ use crate::core::{DictRepository, SystemEncoder};
// - 45: // - 45:
// - word: oral // - word: oral
// Words are fetched from DictRepository in batches
const DEFAULT_DICT_BATCH_SIZE: usize = 100; const DEFAULT_DICT_BATCH_SIZE: usize = 100;
type ValueLength = u8; type ValueLength = u8;
@ -23,35 +28,66 @@ pub type LenValueData = HashMap<ValueLength, HashMap<Value, Vec<String>>>;
#[derive(Debug)] #[derive(Debug)]
pub struct LenValueMap { pub struct LenValueMap {
map: LenValueData, data: LenValueData,
batch_size: usize,
} }
impl LenValueMap { impl LenValueMap {
pub fn new() -> Self { pub fn new() -> Self {
LenValueMap { LenValueMap {
map: LenValueData::new(), data: LenValueData::new(),
batch_size: DEFAULT_DICT_BATCH_SIZE,
} }
} }
pub fn with_data(mut self, data: LenValueData) -> Self { pub fn with_data(mut self, data: LenValueData) -> Self {
self.map = data; self.data = data;
self self
} }
pub fn with_batch_size(mut self, batch_size: usize) -> Self { pub fn data(&self) -> &LenValueData {
self.batch_size = batch_size; &self.data
self }
pub fn into_data(self) -> LenValueData {
self.data
} }
pub fn build<E: SystemEncoder, R: DictRepository>(encoder: &E, repo: &R) -> Self { pub fn insert_words(
let mut map = LenValueData::new(); &mut self,
words: Vec<String>,
decoder: &impl SystemDecoder,
) -> Result<(), Error> {
for word in words {
if word.is_empty() {
continue;
}
let decoded = decoder.decode(&word);
let int_value = decoded.parse::<u64>()?;
let len = decoded.len() as u8;
if let Some(len_item) = self.data.get_mut(&len) {
if let Some(value_item) = len_item.get_mut(&int_value) {
value_item.push(word);
} else {
len_item.insert(int_value, vec![word]);
}
} else {
self.data
.insert(len, HashMap::from([(int_value, vec![word])]));
}
}
Ok(())
}
LenValueMap::new().with_data(map) pub fn from_dict(decoder: &impl SystemDecoder, repo: &impl DictRepository) -> Self {
Self::build(decoder, repo, DEFAULT_DICT_BATCH_SIZE) // is that common approach?
} }
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.map.is_empty() self.data.is_empty()
}
fn build(decoder: &impl SystemDecoder, repo: &impl DictRepository, batch_size: usize) -> Self {
todo!()
} }
} }
@ -61,137 +97,118 @@ mod tests {
use crate::core::{entities::*, errors::*}; use crate::core::{entities::*, errors::*};
use std::collections::HashMap; use std::collections::HashMap;
#[test] use mockall::automock;
fn test_no_inpout_then_empty_map() { use mockall::{mock, predicate::*};
let repo = MockRepository::new().with_single_word_dict();
let encoder = MockEncoder::new(EncodingResult { const TEST_WORD_1: &str = "test_word_1";
input: "".to_string(), const TEST_WORD_2: &str = "test_word_2";
output: vec![], const TEST_WORD_3: &str = "test_word_3";
}); const TEST_WORD_4: &str = "test_word_4";
let lv_map = LenValueMap::build(&encoder, &repo); const TEST_NUM_1: u64 = 12;
assert!(lv_map.is_empty()); const TEST_NUM_2: u64 = 34;
const TEST_NUM_3: u64 = 9876;
const TEST_NUM_1_LEN: u8 = 2;
const TEST_NUM_3_LEN: u8 = 4;
fn get_test_dec_map() -> HashMap<String, DecodedValue> {
HashMap::from([
(TEST_WORD_1.to_string(), TEST_NUM_1.to_string()),
(TEST_WORD_2.to_string(), TEST_NUM_2.to_string()),
(TEST_WORD_3.to_string(), TEST_NUM_3.to_string()),
(TEST_WORD_4.to_string(), TEST_NUM_3.to_string()),
])
} }
// #[test] fn mock_decoding(word: &str) -> DecodedValue {
// fn test_empty_dict_then_no_output() { get_test_dec_map().remove(word).unwrap()
// let dict = Dict {
// name: "test".to_string(),
// entries: HashMap::new(),
// };
// let repo = MockRepository::new(dict);
// let encoder = Encoder::new(&repo);
// let result = encoder.encode("test").unwrap();
// assert!(result.output.is_empty());
// }
// #[test]
// fn test_encode_single_digit() {
// let dict = get_single_word_dict();
// let repo = MockRepository::new(dict);
// let encoder = Encoder::new(&repo).with_batch_size(1);
// let result = encoder.encode("test").unwrap();
// assert!(result.output.is_empty());
// }
// ---------- Helpers ----------
fn get_single_word_dict() -> Dict {
Dict {
name: "test_dict".to_string(),
entries: HashMap::from([(
1,
DictEntry {
id: Some(1),
text: "test_word_1".to_string(),
metadata: HashMap::new(),
},
)]),
}
} }
// ---------- Mocks ---------- fn get_test_words() -> Vec<String> {
vec![
struct MockEncoder { TEST_WORD_1.to_string(),
result: EncodingResult, TEST_WORD_2.to_string(),
TEST_WORD_3.to_string(),
TEST_WORD_4.to_string(),
]
} }
impl MockEncoder { mock! {
fn new(result: EncodingResult) -> Self { pub Decoder {}
MockEncoder { result } impl SystemDecoder for Decoder {
fn decode(&self, word: &str) -> DecodedValue;
} }
} }
impl SystemEncoder for MockEncoder { #[test]
fn initialize(&self) -> Result<(), EncoderError> { fn test_insert_words_empty() {
Ok(()) let words = vec![];
} let mut decoder = MockDecoder::new();
decoder.expect_decode().returning(|_| DecodedValue::new());
fn encode(&self, _word: &str) -> Result<EncodingResult, EncoderError> {
Ok(self.result.clone())
}
}
struct MockRepository { let mut lv_map = LenValueMap::new();
dict: Dict, lv_map.insert_words(words, &decoder).unwrap();
assert!(lv_map.is_empty());
} }
impl MockRepository { #[test]
pub fn new() -> Self { fn test_single_word() {
MockRepository { let words = vec![TEST_WORD_1.to_string()];
dict: Dict::new("test_dict".to_string()), let mut decoder = MockDecoder::new();
} decoder
} .expect_decode()
.returning(|word| mock_decoding(word));
pub fn with_single_word_dict(mut self) -> Self {
self.dict = get_single_word_dict(); let mut lv_map = LenValueMap::new();
self lv_map.insert_words(words, &decoder).unwrap();
}
let data = lv_map.into_data();
assert_eq!(data.len(), 1);
assert!(data.contains_key(&TEST_NUM_1_LEN));
let data = data.get(&TEST_NUM_1_LEN).unwrap();
assert!(data.contains_key(&TEST_NUM_1));
let words = data.get(&TEST_NUM_1).unwrap();
assert_eq!(words.len(), 1);
assert_eq!(words[0], TEST_WORD_1);
} }
#[async_trait::async_trait] #[test]
impl DictRepository for MockRepository { fn test_multiple_words() {
async fn create_dict(&self, _name: &str) -> Result<(), RepositoryError> { let words = get_test_words();
Ok(())
} let mut decoder = MockDecoder::new();
decoder
async fn save_entries( .expect_decode()
&self, .returning(|word| mock_decoding(word));
_dict_name: &str,
_entries: &[DictEntry], let mut lv_map = LenValueMap::new();
) -> Result<(), RepositoryError> { lv_map.insert_words(words, &decoder).unwrap();
todo!()
} let data = lv_map.into_data();
async fn fetch_many( assert_eq!(data.len(), 2); // two different lengths
&self, assert!(data.contains_key(&TEST_NUM_1_LEN));
_name: &str, assert!(data.contains_key(&TEST_NUM_3_LEN));
limit: Option<u32>, let l2 = data.get(&TEST_NUM_1_LEN).unwrap();
offset: Option<u32>, let l4 = data.get(&TEST_NUM_3_LEN).unwrap();
) -> Result<Dict, RepositoryError> {
let offset = offset.unwrap_or(0) as usize; assert_eq!(l2.len(), 2); // two numbers
let limit = limit.unwrap_or(u32::MAX) as usize; assert_eq!(l4.len(), 1); // one number
assert!(l2.contains_key(&TEST_NUM_1));
let mut entries_vec: Vec<_> = self.dict.entries.iter().collect(); assert!(l2.contains_key(&TEST_NUM_2));
entries_vec.sort_by_key(|&(id, _)| *id); assert!(l4.contains_key(&TEST_NUM_3));
let paginated = entries_vec.into_iter().skip(offset).take(limit); let words = l2.get(&TEST_NUM_1).unwrap();
let paginated_map: HashMap<u64, DictEntry> = paginated assert_eq!(words.len(), 1);
.map(|(id, entry)| { assert_eq!(words[0], TEST_WORD_1);
(
*id, let words = l2.get(&TEST_NUM_2).unwrap();
DictEntry { assert_eq!(words.len(), 1);
id: entry.id, assert_eq!(words[0], TEST_WORD_2);
text: entry.text.clone(),
metadata: entry.metadata.clone(), let words = l4.get(&TEST_NUM_3).unwrap();
}, assert_eq!(words.len(), 2);
) assert_eq!(words[0], TEST_WORD_3);
}) assert_eq!(words[1], TEST_WORD_4);
.collect();
Ok(Dict {
name: self.dict.name.clone(),
entries: paginated_map,
})
}
} }
} }

2
lib/src/core/traits.rs

@ -8,7 +8,7 @@ use super::errors::RepositoryError;
/// but decoded as one number. For partial values, we can use /// but decoded as one number. For partial values, we can use
/// u64, but for the whole decoded value that may be very long, /// u64, but for the whole decoded value that may be very long,
/// we need a string. /// we need a string.
type DecodedValue = String; pub type DecodedValue = String;
pub trait SystemDecoder { pub trait SystemDecoder {
fn decode(&self, word: &str) -> DecodedValue; fn decode(&self, word: &str) -> DecodedValue;

Loading…
Cancel
Save