8 changed files with 265 additions and 15 deletions
@ -0,0 +1,23 @@
|
||||
use crate::core::{ |
||||
entities::EncodingResult, errors::EncoderError, sys_major::LenValueMap, traits::*, |
||||
}; |
||||
|
||||
#[derive(Debug)] |
||||
pub struct Encoder { |
||||
lv_map: LenValueMap, |
||||
} |
||||
|
||||
impl Encoder { |
||||
pub fn new(lv_map: LenValueMap) -> Self { |
||||
Encoder { lv_map } |
||||
} |
||||
} |
||||
|
||||
impl SystemEncoder for Encoder { |
||||
fn initialize(&self) -> Result<(), EncoderError> { |
||||
Ok(()) |
||||
} |
||||
fn encode(&self, word: &str) -> Result<EncodingResult, EncoderError> { |
||||
todo!() |
||||
} |
||||
} |
||||
@ -0,0 +1,197 @@
|
||||
use std::collections::HashMap; |
||||
|
||||
use crate::core::{DictRepository, SystemEncoder}; |
||||
|
||||
// We store words by encoded number length, then encoded value
|
||||
// Example:
|
||||
// root:
|
||||
// - 3:
|
||||
// - 750:
|
||||
// - word: klasa
|
||||
// - word: gilza
|
||||
// - 849:
|
||||
// - word: farba
|
||||
// - 2:
|
||||
// - 45:
|
||||
// - word: oral
|
||||
|
||||
const DEFAULT_DICT_BATCH_SIZE: usize = 100; |
||||
|
||||
type ValueLength = u8; |
||||
type Value = u64; |
||||
pub type LenValueData = HashMap<ValueLength, HashMap<Value, Vec<String>>>; |
||||
|
||||
#[derive(Debug)] |
||||
pub struct LenValueMap { |
||||
map: LenValueData, |
||||
batch_size: usize, |
||||
} |
||||
|
||||
impl LenValueMap { |
||||
pub fn new() -> Self { |
||||
LenValueMap { |
||||
map: LenValueData::new(), |
||||
batch_size: DEFAULT_DICT_BATCH_SIZE, |
||||
} |
||||
} |
||||
pub fn with_data(mut self, data: LenValueData) -> Self { |
||||
self.map = data; |
||||
self |
||||
} |
||||
|
||||
pub fn with_batch_size(mut self, batch_size: usize) -> Self { |
||||
self.batch_size = batch_size; |
||||
self |
||||
} |
||||
|
||||
pub fn build<E: SystemEncoder, R: DictRepository>(encoder: &E, repo: &R) -> Self { |
||||
let mut map = LenValueData::new(); |
||||
|
||||
LenValueMap::new().with_data(map) |
||||
} |
||||
|
||||
pub fn is_empty(&self) -> bool { |
||||
self.map.is_empty() |
||||
} |
||||
} |
||||
|
||||
#[cfg(test)] |
||||
mod tests { |
||||
use super::*; |
||||
use crate::core::{entities::*, errors::*}; |
||||
use std::collections::HashMap; |
||||
|
||||
#[test] |
||||
fn test_no_inpout_then_empty_map() { |
||||
let repo = MockRepository::new().with_single_word_dict(); |
||||
let encoder = MockEncoder::new(EncodingResult { |
||||
input: "".to_string(), |
||||
output: vec![], |
||||
}); |
||||
let lv_map = LenValueMap::build(&encoder, &repo); |
||||
assert!(lv_map.is_empty()); |
||||
} |
||||
|
||||
// #[test]
|
||||
// fn test_empty_dict_then_no_output() {
|
||||
// let dict = Dict {
|
||||
// name: "test".to_string(),
|
||||
// entries: HashMap::new(),
|
||||
// };
|
||||
// let repo = MockRepository::new(dict);
|
||||
// let encoder = Encoder::new(&repo);
|
||||
// let result = encoder.encode("test").unwrap();
|
||||
// assert!(result.output.is_empty());
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_encode_single_digit() {
|
||||
// let dict = get_single_word_dict();
|
||||
// let repo = MockRepository::new(dict);
|
||||
// let encoder = Encoder::new(&repo).with_batch_size(1);
|
||||
// let result = encoder.encode("test").unwrap();
|
||||
// assert!(result.output.is_empty());
|
||||
// }
|
||||
|
||||
// ---------- Helpers ----------
|
||||
|
||||
fn get_single_word_dict() -> Dict { |
||||
Dict { |
||||
name: "test_dict".to_string(), |
||||
entries: HashMap::from([( |
||||
1, |
||||
DictEntry { |
||||
id: Some(1), |
||||
text: "test_word_1".to_string(), |
||||
metadata: HashMap::new(), |
||||
}, |
||||
)]), |
||||
} |
||||
} |
||||
|
||||
// ---------- Mocks ----------
|
||||
|
||||
struct MockEncoder { |
||||
result: EncodingResult, |
||||
} |
||||
|
||||
impl MockEncoder { |
||||
fn new(result: EncodingResult) -> Self { |
||||
MockEncoder { result } |
||||
} |
||||
} |
||||
|
||||
impl SystemEncoder for MockEncoder { |
||||
fn initialize(&self) -> Result<(), EncoderError> { |
||||
Ok(()) |
||||
} |
||||
|
||||
fn encode(&self, _word: &str) -> Result<EncodingResult, EncoderError> { |
||||
Ok(self.result.clone()) |
||||
} |
||||
} |
||||
|
||||
struct MockRepository { |
||||
dict: Dict, |
||||
} |
||||
|
||||
impl MockRepository { |
||||
pub fn new() -> Self { |
||||
MockRepository { |
||||
dict: Dict::new("test_dict".to_string()), |
||||
} |
||||
} |
||||
|
||||
pub fn with_single_word_dict(mut self) -> Self { |
||||
self.dict = get_single_word_dict(); |
||||
self |
||||
} |
||||
} |
||||
|
||||
#[async_trait::async_trait] |
||||
impl DictRepository for MockRepository { |
||||
async fn create_dict(&self, _name: &str) -> Result<(), RepositoryError> { |
||||
Ok(()) |
||||
} |
||||
|
||||
async fn save_entries( |
||||
&self, |
||||
_dict_name: &str, |
||||
_entries: &[DictEntry], |
||||
) -> Result<(), RepositoryError> { |
||||
todo!() |
||||
} |
||||
|
||||
async fn fetch_many( |
||||
&self, |
||||
_name: &str, |
||||
limit: Option<u32>, |
||||
offset: Option<u32>, |
||||
) -> Result<Dict, RepositoryError> { |
||||
let offset = offset.unwrap_or(0) as usize; |
||||
let limit = limit.unwrap_or(u32::MAX) as usize; |
||||
|
||||
let mut entries_vec: Vec<_> = self.dict.entries.iter().collect(); |
||||
entries_vec.sort_by_key(|&(id, _)| *id); |
||||
|
||||
let paginated = entries_vec.into_iter().skip(offset).take(limit); |
||||
let paginated_map: HashMap<u64, DictEntry> = paginated |
||||
.map(|(id, entry)| { |
||||
( |
||||
*id, |
||||
DictEntry { |
||||
id: entry.id, |
||||
text: entry.text.clone(), |
||||
metadata: entry.metadata.clone(), |
||||
}, |
||||
) |
||||
}) |
||||
.collect(); |
||||
|
||||
Ok(Dict { |
||||
name: self.dict.name.clone(), |
||||
entries: paginated_map, |
||||
}) |
||||
} |
||||
} |
||||
} |
||||
Loading…
Reference in new issue