Browse Source

WIP: refactor

develop-refactor
chodak166 4 months ago
parent
commit
239e26b08c
  1. 15
      app/src/commands/decode.rs
  2. 17
      app/src/commands/encode.rs
  3. 9
      app/src/commands/import_dict.rs
  4. 23
      app/src/commands/server.rs
  5. 56
      app/src/config.rs
  6. 4
      app/src/container.rs
  7. 8
      lib/migrations/001_initial.sql
  8. 6
      lib/migrations/002_add_updated_at.sql
  9. 7
      lib/src/common.rs
  10. 0
      lib/src/common/entities.rs
  11. 0
      lib/src/common/errors.rs
  12. 41
      lib/src/common/traits.rs
  13. 6
      lib/src/dictionary.rs
  14. 59
      lib/src/dictionary/dict_importer.rs
  15. 2
      lib/src/dictionary/infrastructure.rs
  16. 85
      lib/src/dictionary/infrastructure/json_file_dict_source.rs
  17. 231
      lib/src/dictionary/infrastructure/sqlite_dict_repository.rs
  18. 28
      lib/src/lib.rs
  19. 9
      lib/src/presentation/cli.rs
  20. 20
      lib/src/presentation/server.rs
  21. 12
      lib/src/sys_major.rs
  22. 122
      lib/src/sys_major/decoder.rs
  23. 134
      lib/src/sys_major/decoder_tests.rs
  24. 179
      lib/src/sys_major/encoder.rs
  25. 351
      lib/src/sys_major/lvmap.rs
  26. 0
      lib/src/sys_major/removeme.old/application.rs
  27. 0
      lib/src/sys_major/removeme.old/application/config.rs
  28. 0
      lib/src/sys_major/removeme.old/application/errors.rs
  29. 0
      lib/src/sys_major/removeme.old/application/services.rs
  30. 0
      lib/src/sys_major/removeme.old/application/traits.rs
  31. 0
      lib/src/sys_major/removeme.old/core.rs
  32. 146
      lib/src/sys_major/removeme.old/core/entities.rs
  33. 31
      lib/src/sys_major/removeme.old/core/errors.rs
  34. 0
      lib/src/sys_major/removeme.old/core/sys_major.rs
  35. 0
      lib/src/sys_major/removeme.old/core/sys_major/decoder.rs
  36. 0
      lib/src/sys_major/removeme.old/core/sys_major/decoder_tests.rs
  37. 0
      lib/src/sys_major/removeme.old/core/sys_major/encoder.rs
  38. 0
      lib/src/sys_major/removeme.old/core/sys_major/lvmap.rs
  39. 0
      lib/src/sys_major/removeme.old/core/sys_major/rules_en.rs
  40. 0
      lib/src/sys_major/removeme.old/core/sys_major/rules_pl.rs
  41. 0
      lib/src/sys_major/removeme.old/core/system.rs
  42. 0
      lib/src/sys_major/removeme.old/core/traits.rs
  43. 0
      lib/src/sys_major/removeme.old/infrastructure.rs
  44. 0
      lib/src/sys_major/removeme.old/infrastructure/errors.rs
  45. 0
      lib/src/sys_major/removeme.old/infrastructure/json_file_dict_source.rs
  46. 0
      lib/src/sys_major/removeme.old/infrastructure/sqlite_dict_repository.rs
  47. 0
      lib/src/sys_major/removeme.old/presentation.rs
  48. 15
      lib/src/sys_major/rules_en.rs
  49. 222
      lib/src/sys_major/rules_pl.rs

15
app/src/commands/decode.rs

@ -1,15 +1,16 @@
use crate::commands::{ClapArgs, CommandExecutor, ConfigurableCommand}; use crate::commands::{ClapArgs, CommandExecutor, ConfigurableCommand};
use crate::config::AppConfig; use crate::config::AppConfig;
use crate::config::System;
use crate::container::Container; use crate::container::Container;
use anyhow::Result; use anyhow::Result;
use applib::core::sys_major::decoder::Decoder; use applib::SystemDecoder;
use applib::core::sys_major::{self as major}; use applib::sys_major::decoder::Decoder;
use applib::core::traits::SystemDecoder; use applib::sys_major::{self as major};
use applib::system::System;
use async_trait::async_trait; use async_trait::async_trait;
use config::ConfigBuilder; use config::ConfigBuilder;
use config::builder::DefaultState; use config::builder::DefaultState;
use serde::Deserialize;
mod defaults { mod defaults {
use const_format::formatcp; use const_format::formatcp;
@ -18,6 +19,12 @@ mod defaults {
pub const HELP_DEC_INPUT: &str = formatcp!("Text to decode"); pub const HELP_DEC_INPUT: &str = formatcp!("Text to decode");
} }
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
pub system: System,
pub input: String,
}
#[derive(ClapArgs, Debug, Clone)] #[derive(ClapArgs, Debug, Clone)]
pub struct DecodeArgs { pub struct DecodeArgs {
#[arg(long, help = defaults::HELP_DEC_SYSTEM)] #[arg(long, help = defaults::HELP_DEC_SYSTEM)]

17
app/src/commands/encode.rs

@ -1,11 +1,12 @@
use applib::core::sys_major::encoder::Encoder; use applib::SystemEncoder;
use applib::core::sys_major::{self as major, LenValueMap}; use applib::sys_major::encoder::Encoder;
use applib::core::traits::SystemEncoder; use applib::sys_major::{self as major, LenValueMap};
use applib::system::System;
use serde::Deserialize;
use tracing::debug; use tracing::debug;
use crate::commands::{ClapArgs, CommandExecutor, ConfigurableCommand}; use crate::commands::{ClapArgs, CommandExecutor, ConfigurableCommand};
use crate::config::AppConfig; use crate::config::{AppConfig, System};
use crate::container::Container; use crate::container::Container;
use anyhow::Result; use anyhow::Result;
@ -20,6 +21,12 @@ mod defaults {
pub const HELP_ENC_INPUT: &str = formatcp!("Number to encode"); pub const HELP_ENC_INPUT: &str = formatcp!("Number to encode");
} }
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
pub system: System,
pub input: String,
}
#[derive(ClapArgs, Debug, Clone)] #[derive(ClapArgs, Debug, Clone)]
pub struct EncodeArgs { pub struct EncodeArgs {
#[arg(long, help = defaults::HELP_ENC_SYSTEM)] #[arg(long, help = defaults::HELP_ENC_SYSTEM)]

9
app/src/commands/import_dict.rs

@ -6,6 +6,13 @@ use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use config::ConfigBuilder; use config::ConfigBuilder;
use config::builder::DefaultState; use config::builder::DefaultState;
use serde::Deserialize;
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
pub name: String,
pub path: String,
}
#[derive(ClapArgs, Debug, Clone)] #[derive(ClapArgs, Debug, Clone)]
pub struct ImportDictArgs { pub struct ImportDictArgs {
@ -49,7 +56,7 @@ impl CommandExecutor for ImportDictArgs {
// Importer expects an impl DictSource // Importer expects an impl DictSource
// We need to create a DictSource from the path // We need to create a DictSource from the path
use applib::infrastructure::json_file_dict_source::JsonFileDictSource; use applib::JsonFileDictSource;
let source = JsonFileDictSource::new(&config.path)?; let source = JsonFileDictSource::new(&config.path)?;
importer.import(source).await?; importer.import(source).await?;
Ok(()) Ok(())

23
app/src/commands/server.rs

@ -7,9 +7,15 @@ use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use config::ConfigBuilder; use config::ConfigBuilder;
use config::builder::DefaultState; use config::builder::DefaultState;
use serde::Deserialize;
use tokio::signal; use tokio::signal;
use tracing::{info, warn}; use tracing::{info, warn};
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
pub port: u16,
}
#[derive(ClapArgs, Debug, Clone)] #[derive(ClapArgs, Debug, Clone)]
pub struct ServerArgs { pub struct ServerArgs {
#[arg(short, long, help = defaults::HELP_PORT)] #[arg(short, long, help = defaults::HELP_PORT)]
@ -43,7 +49,15 @@ impl ConfigurableCommand for ServerArgs {
impl CommandExecutor for ServerArgs { impl CommandExecutor for ServerArgs {
async fn execute(&self, config: &AppConfig, _container: &Container) -> Result<()> { async fn execute(&self, config: &AppConfig, _container: &Container) -> Result<()> {
let config = config.server.as_ref().expect("Server config not set"); let config = config.server.as_ref().expect("Server config not set");
applib::presentation::server::run(config.clone(), wait_for_shutdown_signal()).await;
info!("Running server with config: {:#?}", config);
tokio::select! {
_ = server_loop() => {},
_ = wait_for_shutdown_signal() => {
info!("Shutting down server...");
}
}
Ok(()) Ok(())
} }
} }
@ -58,3 +72,10 @@ async fn wait_for_shutdown_signal() {
} }
} }
} }
async fn server_loop() {
loop {
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
info!("Health check... ");
}
}

56
app/src/config.rs

@ -1,19 +1,19 @@
use crate::commands::{ConfigurableCommand, GlobalArgs}; use crate::commands::*;
// use crate::commands::{ConfigurableCommand, GlobalArgs};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use applib::application::config::*;
use config::{Config, Environment, File}; use config::{Config, Environment, File};
use serde::Deserialize; use serde::Deserialize;
#[derive(Debug, Deserialize, Clone)] #[derive(Debug, Deserialize, Clone)]
pub struct AppConfig { pub struct AppConfig {
#[serde(default)] #[serde(default)]
pub server: Option<ServerConfig>, pub server: Option<server::Config>,
#[serde(default)] #[serde(default)]
pub decoder: Option<DecoderConfig>, pub decoder: Option<decode::Config>,
#[serde(default)] #[serde(default)]
pub encoder: Option<EncoderConfig>, pub encoder: Option<encode::Config>,
#[serde(default)] #[serde(default)]
pub import_dict: Option<ImportDictConfig>, pub import_dict: Option<import_dict::Config>,
pub log_level: String, pub log_level: String,
} }
@ -48,3 +48,47 @@ impl AppConfig {
.context("Failed to deserialize Config") .context("Failed to deserialize Config")
} }
} }
// TODO: move?
use applib::sys_major::{self as major, LenValueMap};
use applib::{DictRepository, SystemDecoder, SystemEncoder};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
pub enum System {
#[serde(rename = "major_en")]
MajorEn,
#[serde(rename = "major_pl")]
MajorPl,
}
// from:
impl From<&str> for System {
fn from(s: &str) -> Self {
match s {
"major_en" => System::MajorEn,
"major_pl" => System::MajorPl,
_ => panic!("Unknown system: {}", s),
}
}
}
pub fn create_decoder(system: &System) -> Box<dyn SystemDecoder> {
match system {
System::MajorPl => Box::new(major::Decoder::new(major::rules_pl::get_rules())),
System::MajorEn => Box::new(major::Decoder::new(major::rules_en::get_rules())),
}
}
pub async fn create_encoder(system: &System, dict: &dyn DictRepository) -> Box<dyn SystemEncoder> {
// let decoder = create_decoder(&system);
let decoder = major::Decoder::new(match system {
System::MajorPl => major::rules_pl::get_rules(),
System::MajorEn => major::rules_en::get_rules(),
});
let stream = dict.stream_batches(100).await.unwrap(); // TODO
let lvmap = LenValueMap::from_stream(stream, &decoder).await.unwrap(); // TODO
match system {
System::MajorPl => Box::new(major::Encoder::new(lvmap)),
System::MajorEn => Box::new(major::Encoder::new(lvmap)),
}
}

4
app/src/container.rs

@ -2,8 +2,8 @@ use std::sync::Arc;
// use crate::config::AppConfig; // use crate::config::AppConfig;
use applib::DictImporter; use applib::DictImporter;
use applib::infrastructure::sqlite_dict_repository::SqliteDictRepository; use applib::DictRepository;
use applib::traits::DictRepository; use applib::SqliteDictRepository;
#[derive(Clone)] #[derive(Clone)]
pub struct Container; pub struct Container;

8
lib/migrations/001_initial.sql

@ -1,8 +0,0 @@
-- Create dicts table
CREATE TABLE IF NOT EXISTS dicts (
name TEXT PRIMARY KEY,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Note: Individual dict entry tables will be created dynamically when dicts are created
-- This is because each dict needs its own table with the dict name in the table name

6
lib/migrations/002_add_updated_at.sql

@ -1,6 +0,0 @@
-- This migration adds the updated_at column to existing dict entry tables
-- Since dict entry tables are created dynamically, we need to handle this differently
-- For now, we'll drop and recreate existing tables to ensure they have the correct schema
-- Note: In a production environment, you would want to handle this more carefully
-- by checking each existing dict_entries_* table and adding the column if it doesn't exist

7
lib/src/common.rs

@ -0,0 +1,7 @@
pub mod entities;
pub mod errors;
pub mod traits;
pub use self::traits::DictRepository;
pub use self::traits::SystemDecoder;
pub use self::traits::SystemEncoder;

0
lib/src/core/entities.rs → lib/src/common/entities.rs

0
lib/src/core/errors.rs → lib/src/common/errors.rs

41
lib/src/common/traits.rs

@ -0,0 +1,41 @@
use futures::stream::BoxStream;
use crate::common::{
entities::{DecodedValue, Dict, DictEntry, EncodedValue},
errors::{CodecError, RepositoryError},
};
pub trait SystemDecoder: Send + Sync {
fn decode(&self, word: &str) -> Result<DecodedValue, CodecError>;
}
pub trait SystemEncoder: Send + Sync {
fn initialize(&self) -> Result<(), CodecError>;
fn encode(&self, word: &str) -> Result<EncodedValue, CodecError>;
}
#[async_trait::async_trait]
pub trait DictRepository: Send + Sync {
fn use_dict(&mut self, name: &str);
async fn create_dict(&self) -> Result<(), RepositoryError>;
/// "Upsert" logic:
/// - If entry exists (by text), update metadata.
/// - If not, insert new.
/// - IDs are handled by the Database.
async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError>;
/// Fetch a page of entries.
async fn fetch_many(&self, limit: usize, offset: usize) -> Result<Dict, RepositoryError>;
/// Returns a cold stream that fetches strings in chunks.
/// The stream yields `Result<Vec<String>, RepositoryError>`.
async fn stream_batches(
&self,
batch_size: usize,
) -> Result<BoxStream<'_, Result<Vec<String>, RepositoryError>>, RepositoryError>;
}
pub trait DictSource {
fn next_entry(&mut self) -> Option<Result<DictEntry, anyhow::Error>>;
}

6
lib/src/dictionary.rs

@ -0,0 +1,6 @@
mod dict_importer;
mod infrastructure;
pub use self::dict_importer::DictImporter;
pub use self::infrastructure::json_file_dict_source::JsonFileDictSource;
pub use self::infrastructure::sqlite_dict_repository::SqliteDictRepository;

59
lib/src/dictionary/dict_importer.rs

@ -0,0 +1,59 @@
use std::sync::Arc;
use crate::common::traits::{DictRepository, DictSource};
pub struct DictImporter {
repo: Arc<dyn DictRepository>,
batch_size: usize,
}
impl DictImporter {
pub fn new(repo: Arc<dyn DictRepository>) -> Self {
Self {
repo,
batch_size: 1000, // reasonable default
}
}
pub fn with_batch_size(mut self, batch_size: usize) -> Self {
self.batch_size = batch_size;
self
}
pub async fn import(&self, mut source: impl DictSource) -> Result<(), anyhow::Error> {
// 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?)
self.repo.create_dict().await?;
let mut batch = Vec::with_capacity(self.batch_size);
// 2. Stream data
while let Some(result) = source.next_entry() {
match result {
Ok(entry) => {
// Optional: Domain Validation logic could go here
// if entry.text.is_empty() { continue; }
batch.push(entry);
// 3. Batch Write
if batch.len() >= self.batch_size {
self.repo.save_entries(&batch).await?;
batch.clear();
}
}
Err(e) => {
// Logic: Do we abort on malformed JSON or log and continue?
// Here we abort for safety.
return Err(e);
}
}
}
// 4. Flush remaining
if !batch.is_empty() {
self.repo.save_entries(&batch).await?;
}
Ok(())
}
}

2
lib/src/dictionary/infrastructure.rs

@ -0,0 +1,2 @@
pub mod json_file_dict_source;
pub mod sqlite_dict_repository;

85
lib/src/dictionary/infrastructure/json_file_dict_source.rs

@ -0,0 +1,85 @@
use crate::common::entities::DictEntry;
use crate::common::traits::DictSource;
use serde::Deserialize;
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
// The "Wire Format".
// It exists ONLY here to map external JSON names to internal Entity names.
#[derive(Deserialize)]
struct JsonEntry {
word: String,
metadata: Option<HashMap<String, serde_json::Value>>,
}
pub struct JsonFileDictSource {
entries: Vec<DictEntry>,
current_index: usize,
next_id: u32,
}
impl JsonFileDictSource {
pub fn new<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
let file = File::open(path)?;
let reader = BufReader::new(file);
// Parse as JSON array
let json_entries: Vec<JsonEntry> = serde_json::from_reader(reader)?;
// Convert to DictEntry with auto-generated IDs
let mut entries = Vec::new();
for (index, json_entry) in json_entries.into_iter().enumerate() {
let id = (index + 1) as u64; // Auto-generate ID starting from 1
// Convert metadata from serde_json::Value to HashMap<String, String>
let metadata = if let Some(meta) = json_entry.metadata {
meta.into_iter()
.map(|(k, v)| {
(
k,
match v {
serde_json::Value::String(s) => s,
_ => v.to_string(),
},
)
})
.collect()
} else {
HashMap::new()
};
entries.push(DictEntry {
id: Some(id),
text: json_entry.word,
metadata,
});
}
let entries_len = entries.len();
Ok(Self {
entries,
current_index: 0,
next_id: (entries_len + 1) as u32,
})
}
pub fn new_with_existing_ids<P: AsRef<Path>>(path: P, start_id: u32) -> anyhow::Result<Self> {
let mut source = Self::new(path)?;
source.next_id = start_id;
Ok(source)
}
}
impl DictSource for JsonFileDictSource {
fn next_entry(&mut self) -> Option<Result<DictEntry, anyhow::Error>> {
if self.current_index < self.entries.len() {
let entry = self.entries[self.current_index].clone();
self.current_index += 1;
Some(Ok(entry))
} else {
None
}
}
}

231
lib/src/dictionary/infrastructure/sqlite_dict_repository.rs

@ -0,0 +1,231 @@
use crate::common::entities::{Dict, DictEntry};
use crate::common::errors::RepositoryError;
use crate::common::traits::DictRepository;
use futures::TryStreamExt;
use futures::stream::BoxStream;
use sqlx::{Row, SqlitePool, sqlite::SqliteConnectOptions};
use std::collections::HashMap;
use std::str::FromStr;
#[derive(sqlx::FromRow)]
struct SqliteEntryDto {
id: i64,
text: String,
// sqlx reads the DB column into this specific wrapper
metadata: sqlx::types::Json<HashMap<String, String>>,
}
// Mapper: DTO -> Domain Entity
impl From<SqliteEntryDto> for DictEntry {
fn from(dto: SqliteEntryDto) -> Self {
Self {
id: Some(dto.id as u64),
text: dto.text,
// Unwrap the sqlx wrapper to get the inner HashMap
metadata: dto.metadata.0,
}
}
}
// --- REPOSITORY IMPLEMENTATION ---
#[derive(Clone)]
pub struct SqliteDictRepository {
pool: SqlitePool,
dict_name: String,
}
impl SqliteDictRepository {
pub async fn new(database_url: &str) -> Result<Self, RepositoryError> {
let options = SqliteConnectOptions::from_str(database_url)
.map_err(|_| RepositoryError::ConnectionFailed)?
.create_if_missing(true);
let pool = SqlitePool::connect_with(options)
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
// Ensure tables exist with proper Normalization and Constraints
sqlx::query(
r#"
CREATE TABLE IF NOT EXISTS dictionaries (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS entries (
id INTEGER PRIMARY KEY,
dictionary_id INTEGER NOT NULL,
text TEXT NOT NULL,
metadata TEXT,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY(dictionary_id) REFERENCES dictionaries(id) ON DELETE CASCADE,
-- This constraint allows us to update existing words instead of duplicating them
UNIQUE(dictionary_id, text)
);
"#,
)
.execute(&pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(Self {
pool: pool,
dict_name: "default_dict".into(),
})
}
// Helper: Resolve dictionary name to ID
async fn get_dict_id(&self) -> Result<i64, RepositoryError> {
let row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
.bind(&self.dict_name)
.fetch_optional(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
match row {
Some(r) => Ok(r.get("id")),
None => Err(RepositoryError::NotFound(self.dict_name.clone())),
}
}
}
#[async_trait::async_trait]
impl DictRepository for SqliteDictRepository {
async fn create_dict(&self) -> Result<(), RepositoryError> {
sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)")
.bind(&self.dict_name)
.execute(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(())
}
fn use_dict(&mut self, name: &str) {
self.dict_name = name.to_string();
}
async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError> {
let mut tx = self
.pool
.begin()
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
// 1. Get Dict ID
let dict_id_row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
.bind(&self.dict_name)
.fetch_optional(&mut *tx)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
let dict_id: i64 = match dict_id_row {
Some(row) => row.get("id"),
None => return Err(RepositoryError::NotFound(self.dict_name.clone())),
};
// 2. Batch Upsert
for entry in entries {
// We must wrap the HashMap in sqlx::types::Json so SQLx knows how to serialize it
let meta_json = sqlx::types::Json(&entry.metadata);
sqlx::query(
r#"
INSERT INTO entries (dictionary_id, text, metadata)
VALUES (?, ?, ?)
ON CONFLICT(dictionary_id, text) DO UPDATE SET
metadata = excluded.metadata,
updated_at = CURRENT_TIMESTAMP
"#,
)
.bind(dict_id)
.bind(&entry.text)
.bind(meta_json)
.execute(&mut *tx)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
}
tx.commit()
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(())
}
async fn fetch_many(&self, limit: usize, offset: usize) -> Result<Dict, RepositoryError> {
// Get Dict ID
let dict_id = self.get_dict_id().await?;
// Query (Reading into the DTO)
let dtos = sqlx::query_as::<_, SqliteEntryDto>(
r#"
SELECT id, text, metadata
FROM entries
WHERE dictionary_id = ?
LIMIT ? OFFSET ?
"#,
)
.bind(dict_id)
.bind(limit as u32)
.bind(offset as u32)
.fetch_all(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
// 4. Convert DTOs to Domain Dict
let mut entries_map = HashMap::new();
for dto in dtos {
let entry: DictEntry = dto.into(); // Converts DTO -> Entity
// We safely unwrap because the DB guarantees an ID exists
if let Some(id) = entry.id {
entries_map.insert(id, entry);
}
}
Ok(Dict {
name: self.dict_name.clone(),
entries: entries_map,
})
}
async fn stream_batches(
&self,
batch_size: usize,
) -> Result<BoxStream<'_, Result<Vec<String>, RepositoryError>>, RepositoryError> {
// 1. Resolve ID first
let dict_id = self.get_dict_id().await?;
// 2. Create the base query stream.
// We do NOT use limit/offset. We let the DB stream rows via a cursor.
let query_stream = sqlx::query("SELECT text FROM entries WHERE dictionary_id = ?")
.bind(dict_id)
.fetch(&self.pool);
// 3. Transform the stream using Functional combinators
let stream = query_stream
// Map SQLx errors to Domain errors
.map_err(|e| RepositoryError::StorageError(e.to_string()))
// Extract the String from the Row
.and_then(|row| async move {
// 'text' is the column name
let text: String = row
.try_get("text")
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(text)
})
// Group items into vectors of size `batch_size`
.try_chunks(batch_size)
// try_chunks returns a specific error type on failure, map it back
.map_err(|e| {
// logic to handle leftover elements if error occurs,
// but for simplicity, we treat stream errors as fatal here
RepositoryError::StorageError(e.to_string())
});
// 4. Box the stream to erase the complex iterator type (Type Erasure)
Ok(Box::pin(stream))
}
}

28
lib/src/lib.rs

@ -1,10 +1,20 @@
pub mod application; // pub mod application;
pub mod core; // pub mod core;
pub mod infrastructure; // pub mod infrastructure;
pub mod presentation; // pub mod presentation;
pub use self::application::config; // pub use self::application::config;
pub use self::application::services::DictImporter; // pub use self::application::services::DictImporter;
pub use self::core::system; // pub use self::core::system;
pub use self::core::traits; // pub use self::core::traits;
// pub use self::presentation::cli; // Removed as we are deleting it
mod common;
mod dictionary;
pub mod sys_major;
pub use self::common::DictRepository;
pub use self::common::SystemDecoder;
pub use self::common::SystemEncoder;
pub use self::dictionary::DictImporter;
pub use self::dictionary::JsonFileDictSource;
pub use self::dictionary::SqliteDictRepository;

9
lib/src/presentation/cli.rs

@ -1,9 +0,0 @@
pub mod cli_args;
pub mod commands;
pub mod defaults;
pub mod traits;
pub use self::cli_args::{
CliArgs, Command, DecodeArgs, EncodeArgs, GlobalArgs, ImportDictArgs, ServerArgs,
};
pub use self::traits::ConfigurableCommand;

20
lib/src/presentation/server.rs

@ -1,20 +0,0 @@
use crate::application::config::ServerConfig;
use tracing::info;
pub async fn run(config: ServerConfig, blocker: impl std::future::Future<Output = ()>) {
info!("Running server with config: {:#?}", config);
tokio::select! {
_ = server_loop() => {},
_ = blocker => {
info!("Shutting down server...");
}
}
}
async fn server_loop() {
loop {
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
info!("Health check... ");
}
}

12
lib/src/sys_major.rs

@ -0,0 +1,12 @@
pub mod decoder;
pub mod encoder;
pub mod lvmap;
pub mod rules_en; // TODO: pub?
pub mod rules_pl; // TODO: pub?
pub use self::decoder::Decoder;
pub use self::encoder::Encoder;
pub use self::lvmap::LenValueMap; // TODO: pub?
#[cfg(test)]
mod decoder_tests;

122
lib/src/sys_major/decoder.rs

@ -0,0 +1,122 @@
use crate::common::{entities::DecodedValue, errors::CodecError, traits::SystemDecoder};
#[derive(Debug, Default, Clone)]
pub struct Rule {
pub phoneme_in: String,
pub phoneme_out: String,
pub not_before: Vec<String>,
pub not_after: Vec<String>,
pub only_before: Vec<String>,
pub only_after: Vec<String>,
}
impl Rule {
pub fn into_lowercase(self) -> Self {
Rule {
phoneme_in: self.phoneme_in.to_lowercase(),
phoneme_out: self.phoneme_out.to_lowercase(),
not_before: Self::lower_vec(self.not_before),
not_after: Self::lower_vec(self.not_after),
only_before: Self::lower_vec(self.only_before),
only_after: Self::lower_vec(self.only_after),
}
}
fn lower_vec(vec: Vec<String>) -> Vec<String> {
vec.into_iter().map(|s| s.to_lowercase()).collect()
}
}
pub type Rules = Vec<Rule>;
// pub struct rules {
// name: String,
// entries: Rules,
// }
/// (index, decoded value)
type RuleMatches = Vec<(usize, String)>;
pub struct Decoder {
rules: Rules,
}
impl Decoder {
pub fn new(rules: Rules) -> Self {
Decoder {
rules: Decoder::to_lower_rules(rules),
}
}
fn to_lower_rules(rules: Rules) -> Rules {
rules
.into_iter()
.map(|entry| entry.into_lowercase())
.collect()
}
fn match_entry(&self, entry: &Rule, word: &str) -> RuleMatches {
word.match_indices(&entry.phoneme_in)
.filter(|(index, _)| self.is_context_matched(&entry, &word, *index))
.map(|(index, _)| (index, entry.phoneme_out.clone()))
.collect()
}
fn is_context_matched(&self, entry: &Rule, word: &str, index: usize) -> bool {
let before_context = &word[..index];
let after_context = &word[index + entry.phoneme_in.len()..];
// dbg!(&before_context);
// dbg!(&after_context);
if entry
.not_after
.iter()
.any(|prefix| before_context.ends_with(prefix))
{
return false;
}
if entry
.not_before
.iter()
.any(|suffix| after_context.starts_with(suffix))
{
return false;
}
if !entry.only_after.is_empty()
&& entry
.only_after
.iter()
.all(|prefix| !before_context.ends_with(prefix))
{
return false;
}
if !entry.only_before.is_empty()
&& entry
.only_before
.iter()
.all(|suffix| !after_context.starts_with(suffix))
{
return false;
}
true
}
}
impl SystemDecoder for Decoder {
fn decode(&self, word: &str) -> Result<DecodedValue, CodecError> {
let mut matches: RuleMatches = self
.rules
.iter()
.flat_map(|entry| self.match_entry(&entry, &word.to_lowercase()))
.collect();
matches.sort_by_key(|&(pos, _)| pos);
let num_str: String = matches.into_iter().map(|(_, value)| value).collect();
DecodedValue::new(num_str)
}
}

134
lib/src/sys_major/decoder_tests.rs

@ -0,0 +1,134 @@
use super::decoder::{Decoder, Rule, Rules};
use crate::common::traits::SystemDecoder;
#[cfg(test)]
mod tests {
use super::*;
fn create_single_rules() -> Rules {
vec![Rule {
phoneme_in: "B".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["Y".to_string()],
not_before: vec!["X".to_string()],
only_after: vec!["A".to_string()],
only_before: vec!["C".to_string()],
}]
}
fn create_single_rules_min() -> Rules {
vec![Rule {
phoneme_in: "B".to_string(),
phoneme_out: "2".to_string(),
..Default::default()
}]
}
fn create_double_rules() -> Rules {
vec![
Rule {
phoneme_in: "CD".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["00".to_string(), "YZ".to_string()],
not_before: vec!["11".to_string(), "WX".to_string()],
only_after: vec!["22".to_string(), "AB".to_string()],
only_before: vec!["33".to_string(), "EF".to_string()],
},
Rule {
phoneme_in: "MN".to_string(),
phoneme_out: "3".to_string(),
..Default::default()
},
]
}
#[test]
fn test_single_symbol_encoding_only_before_only_after_matched() {
let decoder = Decoder::new(create_single_rules());
let output = decoder.decode("ABC").unwrap();
assert_eq!(output, "2")
}
#[test]
fn test_double_symbol_encoding_only_before_only_after_matched() {
let decoder = Decoder::new(create_double_rules());
let output = decoder.decode("ABCDEF").unwrap();
assert_eq!(output, "2")
}
#[test]
fn test_single_symbol_encoding_only_before_not_matched_with_other() {
let decoder = Decoder::new(create_single_rules());
let output = decoder.decode("DBC").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_double_symbol_encoding_only_before_not_matched_with_other() {
let decoder = Decoder::new(create_double_rules());
let output = decoder.decode("AACDEE").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_case_insensitivity() {
let decoder = Decoder::new(create_double_rules());
let output = decoder.decode("abcdef").unwrap();
assert_eq!(output, "2")
}
#[test]
fn test_single_symbol_encoding_only_before_not_matched_with_empty() {
let decoder = Decoder::new(create_single_rules());
let output = decoder.decode("BC").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_before_not_matched_with_not_before() {
let decoder = Decoder::new(create_single_rules());
let output = decoder.decode("XBC").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_after_not_matched_with_other() {
let decoder = Decoder::new(create_single_rules());
let output = decoder.decode("ABD").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_after_not_matched_with_empty() {
let decoder = Decoder::new(create_single_rules());
let output = decoder.decode("AB").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_only_after_not_matched_with_not_after() {
let decoder = Decoder::new(create_single_rules());
let output = decoder.decode("ABY").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_single_symbol_encoding_empty_before_after_matched_with_empty() {
let decoder = Decoder::new(create_single_rules_min());
let output = decoder.decode("B").unwrap();
assert_eq!(output, "2")
}
#[test]
fn test_single_symbol_encoding_empty_before_after_matched_with_others() {
let decoder = Decoder::new(create_single_rules_min());
let output = decoder.decode("AXBYC").unwrap();
assert_eq!(output, "2")
}
#[test]
fn test_encoding_multiple_phonemes() {
let decoder = Decoder::new(create_double_rules());
let output = decoder.decode("VvmNabCd33mn00CD22cdefmn").unwrap();
assert_eq!(output, "32323")
}
}

179
lib/src/sys_major/encoder.rs

@ -0,0 +1,179 @@
use crate::common::{
entities::{EncodedPart, EncodedSplit, EncodedValue},
errors::CodecError,
traits::*,
};
use super::lvmap::LenValueMap;
#[derive(Debug)]
pub struct Encoder {
lv_map: LenValueMap,
}
impl Encoder {
pub fn new(lv_map: LenValueMap) -> Self {
Encoder { lv_map }
}
}
impl SystemEncoder for Encoder {
fn initialize(&self) -> Result<(), CodecError> {
Ok(())
}
fn encode(&self, input: &str) -> Result<EncodedValue, CodecError> {
let size = input.chars().count();
let max_mask: usize = (1 << (size - 1)) - 1;
let indices: Vec<usize> = input.char_indices().map(|(i, _)| i).collect();
let mut results = Vec::with_capacity(max_mask);
for mask in 0..=max_mask {
let mut parts: Vec<String> = Vec::new();
let mut last_split = input.char_indices().count(); // we go from right to left to start with the longest parts
// Iterate through the mask bits to find where to split
for i in 0..size - 1 {
// Check if the i-th bit is set
if (mask >> i) & 1 == 1 {
// The split corresponds to the byte index of the (i+1)-th character
let split_idx = indices[indices.len() - i - 1];
parts.push(input[split_idx..last_split].to_string());
last_split = split_idx;
}
}
// Push the remaining part of the string
parts.push(input[..last_split].to_string());
let mut all_matched = true;
let mut split = EncodedSplit::new();
parts.reverse();
for part in &parts {
let Ok(num_part) = part.parse::<u64>() else {
all_matched = false;
break;
};
let Some(words) = self.lv_map.get(part.len() as u8, num_part) else {
all_matched = false;
break;
};
split.push(EncodedPart {
value: num_part,
words: words.clone(),
});
}
if all_matched {
results.push(Partition {
value: split,
// To find the "most equal" size, we minimize the sum of squared lengths.
// (This mathematically minimizes variance without needing floating point math)
sum_sq_len: parts.iter().map(|p| p.chars().count().pow(2)).sum(),
});
}
// Calculate metrics for sorting
// let num_parts = parts.len();
// // To find the "most equal" size, we minimize the sum of squared lengths.
// // (This mathematically minimizes variance without needing floating point math)
// let sum_sq_len: usize = parts.iter().map(|p| p.chars().count().pow(2)).sum();
// if let Some(words) = self.lv_map.get(size as u8, input.parse().unwrap()) {
// results.push(Partition {
// parts: words.clone(),
// sum_sq_len,
// });
// }
}
// Ok(EncodedValue::new(words))
// Sort by:
// 1. Fewer parts first (1 part, then 2 parts...)
// 2. Most equal lengths (lower sum of squared lengths is more balanced)
// 3. TODO: Lexicographically (for deterministic stability)?
results.sort_by(|a, b| {
a.value
.len()
.cmp(&b.value.len())
.then(a.sum_sq_len.cmp(&b.sum_sq_len))
});
// Extract just the strings
let split_results = results.into_iter().map(|p| p.value).collect();
Ok(EncodedValue::new(split_results))
}
}
// A helper struct to keep the split variant and its sort metrics together
struct Partition {
value: EncodedSplit,
sum_sq_len: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_encode_as_single_length_result() {
let mut lvmap = LenValueMap::new();
lvmap.push(3, 123, "test_123");
lvmap.push(3, 345, "test_345_1");
lvmap.push(3, 345, "test_345_2");
lvmap.push(3, 678, "test_678");
let encoder = Encoder::new(lvmap);
let result = encoder.encode("345").unwrap();
assert_eq!(result.len(), 1); // single split
assert_eq!(result[0].len(), 1); // single part
assert_eq!(result[0][0].value, 345);
assert_eq!(result[0][0].words.len(), 2); // two words
assert_eq!(result[0][0].words[0], "test_345_1");
assert_eq!(result[0][0].words[1], "test_345_2");
}
#[test]
fn test_encode_as_all_lengths() {
let mut lvmap = LenValueMap::new();
lvmap.push(1, 0, "test_0");
lvmap.push(1, 9, "test_9");
lvmap.push(1, 8, "test_8");
lvmap.push(1, 7, "test_7");
lvmap.push(2, 98, "test_98");
lvmap.push(2, 87, "test_87");
lvmap.push(3, 987, "test_987");
lvmap.push(3, 876, "test_876");
let encoder = Encoder::new(lvmap);
let result = encoder.encode("987").unwrap();
assert_eq!(result.len(), 4); // 987, 98|7, 9|87, 9|8|7
assert_eq!(result[0].len(), 1); // 987
assert_eq!(result[0][0].words.len(), 1);
assert_eq!(result[0][0].words[0], "test_987");
assert_eq!(result[1].len(), 2); // 98|7
assert_eq!(result[1][0].words.len(), 1);
assert_eq!(result[1][0].words[0], "test_98");
assert_eq!(result[1][1].words.len(), 1);
assert_eq!(result[1][1].words[0], "test_7");
assert_eq!(result[2].len(), 2); // 9|87
assert_eq!(result[2][0].words.len(), 1);
assert_eq!(result[2][0].words[0], "test_9");
assert_eq!(result[2][1].words.len(), 1);
assert_eq!(result[2][1].words[0], "test_87");
assert_eq!(result[3].len(), 3); // 9|8|7
assert_eq!(result[3][0].words.len(), 1);
assert_eq!(result[3][0].words[0], "test_9");
assert_eq!(result[3][1].words.len(), 1);
assert_eq!(result[3][1].words[0], "test_8");
assert_eq!(result[3][2].words.len(), 1);
assert_eq!(result[3][2].words[0], "test_7");
}
}

351
lib/src/sys_major/lvmap.rs

@ -0,0 +1,351 @@
use crate::common::{
SystemDecoder,
entities::DecodedLength,
errors::{CodecError, RepositoryError},
};
use futures::{Stream, StreamExt};
use std::{collections::HashMap, num::ParseIntError};
use thiserror::Error;
// We store words by encoded number length, then encoded value
// Example:
// root:
// - 3:
// - 750:
// - word: klasa
// - word: gilza
// - 849:
// - word: farba
// - 2:
// - 45:
// - word: oral
#[derive(Error, Debug)]
pub enum LenValueMapError {
#[error("value parsing error: {0}")]
Parse(#[from] ParseIntError),
#[error(transparent)]
Codec(#[from] CodecError),
#[error(transparent)]
Repository(#[from] RepositoryError),
#[error("unable to build encoder data: {0}")]
Build(String),
}
type DecodedNumber = u64;
pub type LenValueData = HashMap<DecodedLength, HashMap<DecodedNumber, Vec<String>>>;
#[derive(Debug, Default, Clone)]
pub struct LenValueMap {
data: LenValueData,
}
impl LenValueMap {
pub fn new() -> Self {
Self::default()
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
pub fn into_data(self) -> LenValueData {
self.data
}
pub fn push(&mut self, len: u8, num: DecodedNumber, word: impl Into<String>) -> &mut Self {
self.data
.entry(DecodedLength::from(len))
.or_insert_with(HashMap::new)
.entry(num)
.or_insert_with(Vec::new)
.push(word.into());
self
}
pub fn get(&self, len: u8, num: DecodedNumber) -> Option<&Vec<String>> {
self.data.get(&DecodedLength::from(len))?.get(&num)
}
pub fn insert_words<I>(
&mut self,
words: I,
decoder: &dyn SystemDecoder,
) -> Result<(), LenValueMapError>
where
I: IntoIterator<Item = String>,
{
for word in words {
if word.is_empty() {
continue;
}
let decoded = decoder.decode(&word)?;
if decoded.is_empty() {
continue;
}
self.data
.entry(decoded.value_len()?)
.or_default()
.entry(decoded.parse()?)
.or_default()
.push(word);
}
Ok(())
}
pub fn from_data(data: LenValueData) -> Self {
Self { data: data }
}
pub async fn from_stream<S>(
stream: S,
decoder: &dyn SystemDecoder,
) -> Result<Self, LenValueMapError>
where
// S is a stream of "Result<Vec<String>, Error>"
S: Stream<Item = Result<Vec<String>, RepositoryError>>,
{
let mut map = LenValueMap::new();
let mut stream = Box::pin(stream);
// We stream the batches one by one.
// This ensures only one batch is in memory at a time.
while let Some(batch_result) = stream.next().await {
match batch_result {
Ok(batch) => {
// We delegate to the synchronous logic for the heavy lifting
map.insert_words(batch, decoder)?;
}
Err(e) => {
// Convert RepositoryError to LenValueMapError::Build
return Err(e.into());
}
}
}
Ok(map)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::common::{entities::*, errors::*};
use futures::stream;
use std::collections::HashMap;
use mockall::{mock, predicate::*};
const TEST_WORD_1: &str = "test_word_1";
const TEST_WORD_2: &str = "test_word_2";
const TEST_WORD_3: &str = "test_word_3";
const TEST_WORD_4: &str = "test_word_4";
const TEST_NUM_1: u64 = 12;
const TEST_NUM_2: u64 = 34;
const TEST_NUM_3: u64 = 9876;
const TEST_NUM_1_LEN: DecodedLength = DecodedLength::from(2);
const TEST_NUM_3_LEN: DecodedLength = DecodedLength::from(4);
fn decoded_value(n: u64) -> DecodedValue {
DecodedValue::new(n.to_string()).unwrap()
}
fn get_test_dec_map() -> HashMap<String, DecodedValue> {
HashMap::from([
(TEST_WORD_1.to_string(), decoded_value(TEST_NUM_1)),
(TEST_WORD_2.to_string(), decoded_value(TEST_NUM_2)),
(TEST_WORD_3.to_string(), decoded_value(TEST_NUM_3)),
(TEST_WORD_4.to_string(), decoded_value(TEST_NUM_3)),
])
}
fn mock_decoding(word: &str) -> Result<DecodedValue, CodecError> {
get_test_dec_map()
.remove(word)
.ok_or_else(|| CodecError::UnexpectedError("".to_string()))
}
fn get_test_words() -> Vec<String> {
vec![
TEST_WORD_1.to_string(),
TEST_WORD_2.to_string(),
TEST_WORD_3.to_string(),
TEST_WORD_4.to_string(),
]
}
mock! {
pub Decoder {}
impl SystemDecoder for Decoder {
fn decode(&self, word: &str) -> Result<DecodedValue, CodecError>;
}
}
#[test]
fn test_single_word() {
let words = vec![TEST_WORD_1.to_string()];
let mut decoder = MockDecoder::new();
decoder
.expect_decode()
.returning(|word| mock_decoding(word));
let mut lv_map = LenValueMap::new();
lv_map.insert_words(words, &decoder).unwrap();
let data = lv_map.into_data();
assert_eq!(data.len(), 1);
assert!(data.contains_key(&TEST_NUM_1_LEN));
let data = data.get(&TEST_NUM_1_LEN).unwrap();
assert!(data.contains_key(&TEST_NUM_1));
let words = data.get(&TEST_NUM_1).unwrap();
assert_eq!(words.len(), 1);
assert_eq!(words[0], TEST_WORD_1);
}
#[test]
fn test_multiple_words() {
let words = get_test_words();
let mut decoder = MockDecoder::new();
decoder
.expect_decode()
.returning(|word| mock_decoding(word));
let mut lv_map = LenValueMap::new();
lv_map.insert_words(words, &decoder).unwrap();
let data = lv_map.into_data();
assert_eq!(data.len(), 2); // two different lengths
assert!(data.contains_key(&TEST_NUM_1_LEN));
assert!(data.contains_key(&TEST_NUM_3_LEN));
let l2 = data.get(&TEST_NUM_1_LEN).unwrap();
let l4 = data.get(&TEST_NUM_3_LEN).unwrap();
assert_eq!(l2.len(), 2); // two numbers
assert_eq!(l4.len(), 1); // one number
assert!(l2.contains_key(&TEST_NUM_1));
assert!(l2.contains_key(&TEST_NUM_2));
assert!(l4.contains_key(&TEST_NUM_3));
let words = l2.get(&TEST_NUM_1).unwrap();
assert_eq!(words.len(), 1);
assert_eq!(words[0], TEST_WORD_1);
let words = l2.get(&TEST_NUM_2).unwrap();
assert_eq!(words.len(), 1);
assert_eq!(words[0], TEST_WORD_2);
let words = l4.get(&TEST_NUM_3).unwrap();
assert_eq!(words.len(), 2);
assert!(words.contains(&TEST_WORD_3.to_string()));
assert!(words.contains(&TEST_WORD_4.to_string()));
}
#[test]
fn test_skip_empty_decodes() {
let words = vec![TEST_WORD_1.to_string(), TEST_WORD_2.to_string()];
let mut decoder = MockDecoder::new();
decoder.expect_decode().returning(|word| {
if word == TEST_WORD_1 {
DecodedValue::new("".to_string())
} else {
DecodedValue::new(TEST_NUM_2.to_string())
}
});
let mut lv_map = LenValueMap::new();
lv_map.insert_words(words, &decoder).unwrap();
let data = lv_map.into_data();
assert_eq!(data.len(), 1);
assert!(data.contains_key(&TEST_NUM_1_LEN));
let data = data.get(&TEST_NUM_1_LEN).unwrap();
assert!(data.contains_key(&TEST_NUM_2));
let words = data.get(&TEST_NUM_2).unwrap();
assert_eq!(words.len(), 1);
assert_eq!(words[0], TEST_WORD_2);
}
#[test]
fn test_decoder_error_propagates() {
let mut decoder = MockDecoder::new();
decoder
.expect_decode()
.returning(|_| Err(CodecError::UnexpectedError("boom".into())));
let mut map = LenValueMap::new();
let result = map.insert_words(vec!["x".into()], &decoder);
assert!(result.is_err());
}
// --- build ---
#[tokio::test]
async fn test_from_stream_success() {
// 1. Setup Mocks (Same as before)
let mut decoder = MockDecoder::new();
decoder
.expect_decode()
.returning(|word| mock_decoding(word));
// 2. Prepare Data
// We wrap the inner Vecs in Ok() because the stream expects Result<Vec<String>, RepositoryError>
let batches = vec![
Ok(vec![TEST_WORD_1.into(), TEST_WORD_2.into()]),
Ok(vec![TEST_WORD_3.into(), TEST_WORD_4.into()]),
];
// 3. Create a Stream from the Vec
// stream::iter converts an IntoIterator into a Stream
let stream = stream::iter(batches);
// 4. Inject the stream (Dependency Injection)
let map = LenValueMap::from_stream(stream, &decoder)
.await
.expect("Should build map successfully");
// 5. Assertions
let data = map.into_data();
assert_eq!(data.len(), 2);
assert!(data.contains_key(&TEST_NUM_1_LEN));
assert!(data.contains_key(&TEST_NUM_3_LEN));
}
#[tokio::test]
async fn test_from_stream_failure() {
let mut decoder = MockDecoder::new();
decoder
.expect_decode()
.returning(|word| mock_decoding(word));
let batches = vec![
Ok(vec![TEST_WORD_1.into()]),
Err(RepositoryError::ConnectionFailed),
Ok(vec![TEST_WORD_3.into()]),
];
let stream = stream::iter(batches);
let result = LenValueMap::from_stream(stream, &decoder).await;
match result {
// We match specifically on the Repository variant and the ConnectionFailed inner error
Err(LenValueMapError::Repository(RepositoryError::ConnectionFailed)) => {
// Success! The correct error type propagated up.
}
// If it's any other error (including a stringified one), we fail
_ => panic!(
"Expected LenValueMapError::Repository(ConnectionFailed), got {:?}",
result
),
}
}
}

0
lib/src/application.rs → lib/src/sys_major/removeme.old/application.rs

0
lib/src/application/config.rs → lib/src/sys_major/removeme.old/application/config.rs

0
lib/src/application/errors.rs → lib/src/sys_major/removeme.old/application/errors.rs

0
lib/src/application/services.rs → lib/src/sys_major/removeme.old/application/services.rs

0
lib/src/application/traits.rs → lib/src/sys_major/removeme.old/application/traits.rs

0
lib/src/core.rs → lib/src/sys_major/removeme.old/core.rs

146
lib/src/sys_major/removeme.old/core/entities.rs

@ -0,0 +1,146 @@
use super::errors::CodecError;
use serde::Serialize;
use std::num::ParseIntError;
use std::ops::Deref;
use std::{collections::HashMap, u64};
/// A number encoded as a sequence of words
#[derive(Debug, Clone, Serialize)]
pub struct EncodedPart {
pub value: u64,
pub words: Vec<String>,
}
/// A way (variant) to split input number
pub type EncodedSplit = Vec<EncodedPart>;
/// A number encoded as words, split in multiple ways
#[derive(Debug, Clone, Serialize)]
pub struct EncodedValue(Vec<EncodedSplit>);
impl EncodedValue {
pub fn new(data: Vec<EncodedSplit>) -> Self {
EncodedValue(data)
}
}
impl Deref for EncodedValue {
type Target = Vec<EncodedSplit>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// The number value can be encoded as many word sets,
/// but decoded as one number. For partial values
/// and dictionary words (reasonable length), we can use
/// u64 (20-digit number), but the whole input text can
/// be longer than 20 digits, so we operate on String (<= 255).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodedValue(String);
impl DecodedValue {
pub fn new(value: String) -> Result<Self, CodecError> {
if value.len() > u8::MAX as usize {
Err(CodecError::TextTooLong(value.len()))
} else {
Ok(Self(value))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn parse(&self) -> Result<u64, ParseIntError> {
self.0.parse()
}
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn value_len(&self) -> Result<DecodedLength, CodecError> {
if self.len() == 0 {
return Err(CodecError::EmptyValue);
}
DecodedLength::try_from(self.len())
}
}
impl PartialEq<&str> for DecodedValue {
fn eq(&self, other: &&str) -> bool {
&self.0 == *other
}
}
impl PartialEq<DecodedValue> for &str {
fn eq(&self, other: &DecodedValue) -> bool {
*self == &other.0
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct DecodedLength(u8);
impl DecodedLength {
pub const fn from(value: u8) -> Self {
Self(value)
}
}
impl TryFrom<usize> for DecodedLength {
type Error = CodecError;
fn try_from(value: usize) -> Result<Self, CodecError> {
if value > u8::MAX as usize {
Err(CodecError::ValueLimitExceeded(value))
} else {
Ok(Self(value as u8))
}
}
}
// --- Dictionary ---
pub type DictEntryId = u64;
#[derive(Debug, Clone, PartialEq)]
pub struct DictEntry {
pub id: Option<DictEntryId>,
pub text: String,
pub metadata: HashMap<String, String>,
}
impl DictEntry {
pub fn new(id: Option<DictEntryId>, text: String) -> Self {
DictEntry {
id,
text,
metadata: HashMap::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct Dict {
pub name: String,
pub entries: HashMap<DictEntryId, DictEntry>,
}
impl Dict {
pub fn new(name: String) -> Self {
Dict {
name,
entries: HashMap::new(),
}
}
pub fn add_entry(&mut self, entry: DictEntry) {
self.entries.insert(entry.id.unwrap(), entry);
}
}

31
lib/src/sys_major/removeme.old/core/errors.rs

@ -0,0 +1,31 @@
use thiserror::Error;
#[derive(Error, Debug)]
pub enum RepositoryError {
#[error("Data source connection failed")]
ConnectionFailed,
#[error("'{0}' not found")]
NotFound(String),
#[error("Storage error: {0}")]
StorageError(String),
}
#[derive(Debug, Error)]
pub enum CodecError {
#[error("text too long: {0} bytes")]
TextTooLong(usize),
#[error("value too large: {0}/255")]
ValueLimitExceeded(usize),
#[error("operation not allowed on empty value")]
EmptyValue,
#[error("initialization failed")]
InitializationFailed,
#[error("unexpected error: {0}")]
UnexpectedError(String),
}

0
lib/src/core/sys_major.rs → lib/src/sys_major/removeme.old/core/sys_major.rs

0
lib/src/core/sys_major/decoder.rs → lib/src/sys_major/removeme.old/core/sys_major/decoder.rs

0
lib/src/core/sys_major/decoder_tests.rs → lib/src/sys_major/removeme.old/core/sys_major/decoder_tests.rs

0
lib/src/core/sys_major/encoder.rs → lib/src/sys_major/removeme.old/core/sys_major/encoder.rs

0
lib/src/core/sys_major/lvmap.rs → lib/src/sys_major/removeme.old/core/sys_major/lvmap.rs

0
lib/src/core/sys_major/rules_en.rs → lib/src/sys_major/removeme.old/core/sys_major/rules_en.rs

0
lib/src/core/sys_major/rules_pl.rs → lib/src/sys_major/removeme.old/core/sys_major/rules_pl.rs

0
lib/src/core/system.rs → lib/src/sys_major/removeme.old/core/system.rs

0
lib/src/core/traits.rs → lib/src/sys_major/removeme.old/core/traits.rs

0
lib/src/infrastructure.rs → lib/src/sys_major/removeme.old/infrastructure.rs

0
lib/src/infrastructure/errors.rs → lib/src/sys_major/removeme.old/infrastructure/errors.rs

0
lib/src/infrastructure/json_file_dict_source.rs → lib/src/sys_major/removeme.old/infrastructure/json_file_dict_source.rs

0
lib/src/infrastructure/sqlite_dict_repository.rs → lib/src/sys_major/removeme.old/infrastructure/sqlite_dict_repository.rs

0
lib/src/presentation.rs → lib/src/sys_major/removeme.old/presentation.rs

15
lib/src/sys_major/rules_en.rs

@ -0,0 +1,15 @@
use super::decoder::{Rule, Rules};
pub fn get_rules() -> Rules {
vec![
Rule {
phoneme_in: "EN".to_string(),
phoneme_out: "2".to_string(),
not_after: vec!["Y".to_string()],
not_before: vec!["X".to_string()],
only_after: vec!["A".to_string()],
only_before: vec!["C".to_string()],
},
// ...more entries...
]
}

222
lib/src/sys_major/rules_pl.rs

@ -0,0 +1,222 @@
use super::decoder::{Rule, Rules};
pub fn get_rules() -> Rules {
vec![
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "S".to_string(),
phoneme_out: "0".to_string(),
not_before: vec!["I".to_string(), "Z".to_string()],
only_before: vec![],
},
Rule {
not_after: vec![
"C".to_string(),
"D".to_string(),
"R".to_string(),
"S".to_string(),
],
only_after: vec![],
phoneme_in: "Z".to_string(),
phoneme_out: "0".to_string(),
not_before: vec!["I".to_string()],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "T".to_string(),
phoneme_out: "1".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
only_after: vec![],
not_after: vec![],
phoneme_in: "D".to_string(),
phoneme_out: "1".to_string(),
not_before: vec!["Z".to_string(), "Ź".to_string(), "Ż".to_string()],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "N".to_string(),
phoneme_out: "2".to_string(),
not_before: vec!["I".to_string()],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "M".to_string(),
phoneme_out: "3".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "R".to_string(),
phoneme_out: "4".to_string(),
not_before: vec!["Z".to_string()],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "L".to_string(),
phoneme_out: "5".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "J".to_string(),
phoneme_out: "6".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "K".to_string(),
phoneme_out: "7".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "G".to_string(),
phoneme_out: "7".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "F".to_string(),
phoneme_out: "8".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "W".to_string(),
phoneme_out: "8".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "P".to_string(),
phoneme_out: "9".to_string(),
not_before: vec![],
only_before: vec![],
},
Rule {
not_after: vec![],
only_after: vec![],
phoneme_in: "B".to_string(),
phoneme_out: "9".to_string(),
not_before: vec![],
only_before: vec![],
},
]
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::sys_major::Decoder;
use crate::traits::SystemDecoder;
#[test]
fn test_major_dict_pl_decode_0_1() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("SZSCZ").unwrap();
assert_eq!(output, "0")
}
#[test]
fn test_major_dict_pl_decode_0_2() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("SZSICZ").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_major_dict_pl_decode_0_3() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("SZCZRZZCZDZSZ").unwrap();
assert_eq!(output, "0")
}
#[test]
fn test_major_dict_pl_decode_0_4() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("SZCZRZZICZDZSZ").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_major_dict_pl_decode_1_1() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("SZTCZ").unwrap();
assert_eq!(output, "1")
}
#[test]
fn test_major_dict_pl_decode_1_2() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("DZDŻDŹDDZDŻDŹ").unwrap();
assert_eq!(output, "1")
}
#[test]
fn test_major_dict_pl_decode_1_3() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("DZDŻDŹDZDZDŻDŹ").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_major_dict_pl_decode_2_1() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("NINNI").unwrap();
assert_eq!(output, "2")
}
#[test]
fn test_major_dict_pl_decode_2_2() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("NININI").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_major_dict_pl_decode_4_1() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("RZRRZ").unwrap();
assert_eq!(output, "4")
}
#[test]
fn test_major_dict_pl_decode_4_2() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("RZRZRZ").unwrap();
assert_eq!(output, "")
}
#[test]
fn test_major_dict_pl_decode_full_1() {
let decoder = Decoder::new(get_rules());
let output = decoder.decode("ATADANAMARALAJAKAGAFAWAPABA").unwrap();
assert_eq!(output, "1123456778899")
}
}
Loading…
Cancel
Save