WIP: refactor

4 months ago · 239e26b08c
49 changed files with 1767 additions and 71 deletions
--- a/app/src/commands/decode.rs
+++ b/app/src/commands/decode.rs
@ -1,15 +1,16 @@
 use crate::commands::{ClapArgs, CommandExecutor, ConfigurableCommand};
 use crate::config::AppConfig;
 use crate::config::System;
 use crate::container::Container;
 use anyhow::Result;
-use applib::core::sys_major::decoder::Decoder;
+use applib::SystemDecoder;
-use applib::core::sys_major::{self as major};
+use applib::sys_major::decoder::Decoder;
-use applib::core::traits::SystemDecoder;
+use applib::sys_major::{self as major};
 use applib::system::System;
 use async_trait::async_trait;
 use config::ConfigBuilder;
 use config::builder::DefaultState;
 use serde::Deserialize;
 mod defaults {
    use const_format::formatcp;
@ -18,6 +19,12 @@ mod defaults {
    pub const HELP_DEC_INPUT: &str = formatcp!("Text to decode");
 }
 #[derive(Debug, Deserialize, Clone)]
 pub struct Config {
    pub system: System,
    pub input: String,
 }
 #[derive(ClapArgs, Debug, Clone)]
 pub struct DecodeArgs {
    #[arg(long, help = defaults::HELP_DEC_SYSTEM)]
--- a/app/src/commands/encode.rs
+++ b/app/src/commands/encode.rs
@ -1,11 +1,12 @@
-use applib::core::sys_major::encoder::Encoder;
+use applib::SystemEncoder;
-use applib::core::sys_major::{self as major, LenValueMap};
+use applib::sys_major::encoder::Encoder;
-use applib::core::traits::SystemEncoder;
+use applib::sys_major::{self as major, LenValueMap};
-use applib::system::System;
+
 use serde::Deserialize;
 use tracing::debug;
 use crate::commands::{ClapArgs, CommandExecutor, ConfigurableCommand};
-use crate::config::AppConfig;
+use crate::config::{AppConfig, System};
 use crate::container::Container;
 use anyhow::Result;
@ -20,6 +21,12 @@ mod defaults {
    pub const HELP_ENC_INPUT: &str = formatcp!("Number to encode");
 }
 #[derive(Debug, Deserialize, Clone)]
 pub struct Config {
    pub system: System,
    pub input: String,
 }
 #[derive(ClapArgs, Debug, Clone)]
 pub struct EncodeArgs {
    #[arg(long, help = defaults::HELP_ENC_SYSTEM)]
--- a/app/src/commands/import_dict.rs
+++ b/app/src/commands/import_dict.rs
@ -6,6 +6,13 @@ use anyhow::Result;
 use async_trait::async_trait;
 use config::ConfigBuilder;
 use config::builder::DefaultState;
 use serde::Deserialize;
 #[derive(Debug, Deserialize, Clone)]
 pub struct Config {
    pub name: String,
    pub path: String,
 }
 #[derive(ClapArgs, Debug, Clone)]
 pub struct ImportDictArgs {
@ -49,7 +56,7 @@ impl CommandExecutor for ImportDictArgs {
        // Importer expects an impl DictSource
        // We need to create a DictSource from the path
-        use applib::infrastructure::json_file_dict_source::JsonFileDictSource;
+        use applib::JsonFileDictSource;
        let source = JsonFileDictSource::new(&config.path)?;
        importer.import(source).await?;
        Ok(())
--- a/app/src/commands/server.rs
+++ b/app/src/commands/server.rs
@ -7,9 +7,15 @@ use anyhow::Result;
 use async_trait::async_trait;
 use config::ConfigBuilder;
 use config::builder::DefaultState;
 use serde::Deserialize;
 use tokio::signal;
 use tracing::{info, warn};
 #[derive(Debug, Deserialize, Clone)]
 pub struct Config {
    pub port: u16,
 }
 #[derive(ClapArgs, Debug, Clone)]
 pub struct ServerArgs {
    #[arg(short, long, help = defaults::HELP_PORT)]
@ -43,7 +49,15 @@ impl ConfigurableCommand for ServerArgs {
 impl CommandExecutor for ServerArgs {
    async fn execute(&self, config: &AppConfig, _container: &Container) -> Result<()> {
        let config = config.server.as_ref().expect("Server config not set");
-        applib::presentation::server::run(config.clone(), wait_for_shutdown_signal()).await;
+
        info!("Running server with config: {:#?}", config);
        tokio::select! {
            _ = server_loop() => {},
            _ = wait_for_shutdown_signal() => {
                info!("Shutting down server...");
            }
        }
        Ok(())
    }
 }
@ -58,3 +72,10 @@ async fn wait_for_shutdown_signal() {
        }
    }
 }
 async fn server_loop() {
    loop {
        tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
        info!("Health check... ");
    }
 }
--- a/app/src/config.rs
+++ b/app/src/config.rs
@ -1,19 +1,19 @@
-use crate::commands::{ConfigurableCommand, GlobalArgs};
+use crate::commands::*;
 // use crate::commands::{ConfigurableCommand, GlobalArgs};
 use anyhow::{Context, Result};
 use applib::application::config::*;
 use config::{Config, Environment, File};
 use serde::Deserialize;
 #[derive(Debug, Deserialize, Clone)]
 pub struct AppConfig {
    #[serde(default)]
-    pub server: Option<ServerConfig>,
+    pub server: Option<server::Config>,
    #[serde(default)]
-    pub decoder: Option<DecoderConfig>,
+    pub decoder: Option<decode::Config>,
    #[serde(default)]
-    pub encoder: Option<EncoderConfig>,
+    pub encoder: Option<encode::Config>,
    #[serde(default)]
-    pub import_dict: Option<ImportDictConfig>,
+    pub import_dict: Option<import_dict::Config>,
    pub log_level: String,
 }
@ -48,3 +48,47 @@ impl AppConfig {
            .context("Failed to deserialize Config")
    }
 }
 // TODO: move?
 use applib::sys_major::{self as major, LenValueMap};
 use applib::{DictRepository, SystemDecoder, SystemEncoder};
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
 pub enum System {
    #[serde(rename = "major_en")]
    MajorEn,
    #[serde(rename = "major_pl")]
    MajorPl,
 }
 // from:
 impl From<&str> for System {
    fn from(s: &str) -> Self {
        match s {
            "major_en" => System::MajorEn,
            "major_pl" => System::MajorPl,
            _ => panic!("Unknown system: {}", s),
        }
    }
 }
 pub fn create_decoder(system: &System) -> Box<dyn SystemDecoder> {
    match system {
        System::MajorPl => Box::new(major::Decoder::new(major::rules_pl::get_rules())),
        System::MajorEn => Box::new(major::Decoder::new(major::rules_en::get_rules())),
    }
 }
 pub async fn create_encoder(system: &System, dict: &dyn DictRepository) -> Box<dyn SystemEncoder> {
    // let decoder = create_decoder(&system);
    let decoder = major::Decoder::new(match system {
        System::MajorPl => major::rules_pl::get_rules(),
        System::MajorEn => major::rules_en::get_rules(),
    });
    let stream = dict.stream_batches(100).await.unwrap(); // TODO
    let lvmap = LenValueMap::from_stream(stream, &decoder).await.unwrap(); // TODO
    match system {
        System::MajorPl => Box::new(major::Encoder::new(lvmap)),
        System::MajorEn => Box::new(major::Encoder::new(lvmap)),
    }
 }
--- a/app/src/container.rs
+++ b/app/src/container.rs
@ -2,8 +2,8 @@ use std::sync::Arc;
 // use crate::config::AppConfig;
 use applib::DictImporter;
-use applib::infrastructure::sqlite_dict_repository::SqliteDictRepository;
+use applib::DictRepository;
-use applib::traits::DictRepository;
+use applib::SqliteDictRepository;
 #[derive(Clone)]
 pub struct Container;
--- a/lib/migrations/001_initial.sql
+++ b/lib/migrations/001_initial.sql
@ -1,8 +0,0 @@
 -- Create dicts table
 CREATE TABLE IF NOT EXISTS dicts (
    name TEXT PRIMARY KEY,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP
 );
 -- Note: Individual dict entry tables will be created dynamically when dicts are created
 -- This is because each dict needs its own table with the dict name in the table name
--- a/lib/migrations/002_add_updated_at.sql
+++ b/lib/migrations/002_add_updated_at.sql
@ -1,6 +0,0 @@
 -- This migration adds the updated_at column to existing dict entry tables
 -- Since dict entry tables are created dynamically, we need to handle this differently
 -- For now, we'll drop and recreate existing tables to ensure they have the correct schema
 -- Note: In a production environment, you would want to handle this more carefully
 -- by checking each existing dict_entries_* table and adding the column if it doesn't exist
--- a/lib/src/common.rs
+++ b/lib/src/common.rs
@ -0,0 +1,7 @@
 pub mod entities;
 pub mod errors;
 pub mod traits;
 pub use self::traits::DictRepository;
 pub use self::traits::SystemDecoder;
 pub use self::traits::SystemEncoder;
--- a/lib/src/common/entities.rs
+++ b/lib/src/common/entities.rs
--- a/lib/src/common/errors.rs
+++ b/lib/src/common/errors.rs
--- a/lib/src/common/traits.rs
+++ b/lib/src/common/traits.rs
@ -0,0 +1,41 @@
 use futures::stream::BoxStream;
 use crate::common::{
    entities::{DecodedValue, Dict, DictEntry, EncodedValue},
    errors::{CodecError, RepositoryError},
 };
 pub trait SystemDecoder: Send + Sync {
    fn decode(&self, word: &str) -> Result<DecodedValue, CodecError>;
 }
 pub trait SystemEncoder: Send + Sync {
    fn initialize(&self) -> Result<(), CodecError>;
    fn encode(&self, word: &str) -> Result<EncodedValue, CodecError>;
 }
 #[async_trait::async_trait]
 pub trait DictRepository: Send + Sync {
    fn use_dict(&mut self, name: &str);
    async fn create_dict(&self) -> Result<(), RepositoryError>;
    /// "Upsert" logic:
    /// - If entry exists (by text), update metadata.
    /// - If not, insert new.
    /// - IDs are handled by the Database.
    async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError>;
    /// Fetch a page of entries.
    async fn fetch_many(&self, limit: usize, offset: usize) -> Result<Dict, RepositoryError>;
    /// Returns a cold stream that fetches strings in chunks.
    /// The stream yields `Result<Vec<String>, RepositoryError>`.
    async fn stream_batches(
        &self,
        batch_size: usize,
    ) -> Result<BoxStream<'_, Result<Vec<String>, RepositoryError>>, RepositoryError>;
 }
 pub trait DictSource {
    fn next_entry(&mut self) -> Option<Result<DictEntry, anyhow::Error>>;
 }
--- a/lib/src/dictionary.rs
+++ b/lib/src/dictionary.rs
@ -0,0 +1,6 @@
 mod dict_importer;
 mod infrastructure;
 pub use self::dict_importer::DictImporter;
 pub use self::infrastructure::json_file_dict_source::JsonFileDictSource;
 pub use self::infrastructure::sqlite_dict_repository::SqliteDictRepository;
--- a/lib/src/dictionary/dict_importer.rs
+++ b/lib/src/dictionary/dict_importer.rs
@ -0,0 +1,59 @@
 use std::sync::Arc;
 use crate::common::traits::{DictRepository, DictSource};
 pub struct DictImporter {
    repo: Arc<dyn DictRepository>,
    batch_size: usize,
 }
 impl DictImporter {
    pub fn new(repo: Arc<dyn DictRepository>) -> Self {
        Self {
            repo,
            batch_size: 1000, // reasonable default
        }
    }
    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
        self.batch_size = batch_size;
        self
    }
    pub async fn import(&self, mut source: impl DictSource) -> Result<(), anyhow::Error> {
        // 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?)
        self.repo.create_dict().await?;
        let mut batch = Vec::with_capacity(self.batch_size);
        // 2. Stream data
        while let Some(result) = source.next_entry() {
            match result {
                Ok(entry) => {
                    // Optional: Domain Validation logic could go here
                    // if entry.text.is_empty() { continue; }
                    batch.push(entry);
                    // 3. Batch Write
                    if batch.len() >= self.batch_size {
                        self.repo.save_entries(&batch).await?;
                        batch.clear();
                    }
                }
                Err(e) => {
                    // Logic: Do we abort on malformed JSON or log and continue?
                    // Here we abort for safety.
                    return Err(e);
                }
            }
        }
        // 4. Flush remaining
        if !batch.is_empty() {
            self.repo.save_entries(&batch).await?;
        }
        Ok(())
    }
 }
--- a/lib/src/dictionary/infrastructure.rs
+++ b/lib/src/dictionary/infrastructure.rs
@ -0,0 +1,2 @@
 pub mod json_file_dict_source;
 pub mod sqlite_dict_repository;
--- a/lib/src/dictionary/infrastructure/json_file_dict_source.rs
+++ b/lib/src/dictionary/infrastructure/json_file_dict_source.rs
@ -0,0 +1,85 @@
 use crate::common::entities::DictEntry;
 use crate::common::traits::DictSource;
 use serde::Deserialize;
 use std::collections::HashMap;
 use std::fs::File;
 use std::io::BufReader;
 use std::path::Path;
 // The "Wire Format".
 // It exists ONLY here to map external JSON names to internal Entity names.
 #[derive(Deserialize)]
 struct JsonEntry {
    word: String,
    metadata: Option<HashMap<String, serde_json::Value>>,
 }
 pub struct JsonFileDictSource {
    entries: Vec<DictEntry>,
    current_index: usize,
    next_id: u32,
 }
 impl JsonFileDictSource {
    pub fn new<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
        let file = File::open(path)?;
        let reader = BufReader::new(file);
        // Parse as JSON array
        let json_entries: Vec<JsonEntry> = serde_json::from_reader(reader)?;
        // Convert to DictEntry with auto-generated IDs
        let mut entries = Vec::new();
        for (index, json_entry) in json_entries.into_iter().enumerate() {
            let id = (index + 1) as u64; // Auto-generate ID starting from 1
            // Convert metadata from serde_json::Value to HashMap<String, String>
            let metadata = if let Some(meta) = json_entry.metadata {
                meta.into_iter()
                    .map(|(k, v)| {
                        (
                            k,
                            match v {
                                serde_json::Value::String(s) => s,
                                _ => v.to_string(),
                            },
                        )
                    })
                    .collect()
            } else {
                HashMap::new()
            };
            entries.push(DictEntry {
                id: Some(id),
                text: json_entry.word,
                metadata,
            });
        }
        let entries_len = entries.len();
        Ok(Self {
            entries,
            current_index: 0,
            next_id: (entries_len + 1) as u32,
        })
    }
    pub fn new_with_existing_ids<P: AsRef<Path>>(path: P, start_id: u32) -> anyhow::Result<Self> {
        let mut source = Self::new(path)?;
        source.next_id = start_id;
        Ok(source)
    }
 }
 impl DictSource for JsonFileDictSource {
    fn next_entry(&mut self) -> Option<Result<DictEntry, anyhow::Error>> {
        if self.current_index < self.entries.len() {
            let entry = self.entries[self.current_index].clone();
            self.current_index += 1;
            Some(Ok(entry))
        } else {
            None
        }
    }
 }
--- a/lib/src/dictionary/infrastructure/sqlite_dict_repository.rs
+++ b/lib/src/dictionary/infrastructure/sqlite_dict_repository.rs
@ -0,0 +1,231 @@
 use crate::common::entities::{Dict, DictEntry};
 use crate::common::errors::RepositoryError;
 use crate::common::traits::DictRepository;
 use futures::TryStreamExt;
 use futures::stream::BoxStream;
 use sqlx::{Row, SqlitePool, sqlite::SqliteConnectOptions};
 use std::collections::HashMap;
 use std::str::FromStr;
 #[derive(sqlx::FromRow)]
 struct SqliteEntryDto {
    id: i64,
    text: String,
    // sqlx reads the DB column into this specific wrapper
    metadata: sqlx::types::Json<HashMap<String, String>>,
 }
 // Mapper: DTO -> Domain Entity
 impl From<SqliteEntryDto> for DictEntry {
    fn from(dto: SqliteEntryDto) -> Self {
        Self {
            id: Some(dto.id as u64),
            text: dto.text,
            // Unwrap the sqlx wrapper to get the inner HashMap
            metadata: dto.metadata.0,
        }
    }
 }
 // --- REPOSITORY IMPLEMENTATION ---
 #[derive(Clone)]
 pub struct SqliteDictRepository {
    pool: SqlitePool,
    dict_name: String,
 }
 impl SqliteDictRepository {
    pub async fn new(database_url: &str) -> Result<Self, RepositoryError> {
        let options = SqliteConnectOptions::from_str(database_url)
            .map_err(|_| RepositoryError::ConnectionFailed)?
            .create_if_missing(true);
        let pool = SqlitePool::connect_with(options)
            .await
            .map_err(|_| RepositoryError::ConnectionFailed)?;
        // Ensure tables exist with proper Normalization and Constraints
        sqlx::query(
            r#"
            CREATE TABLE IF NOT EXISTS dictionaries (
                id INTEGER PRIMARY KEY,
                name TEXT NOT NULL UNIQUE,
                created_at DATETIME DEFAULT CURRENT_TIMESTAMP
            );
            CREATE TABLE IF NOT EXISTS entries (
                id INTEGER PRIMARY KEY,
                dictionary_id INTEGER NOT NULL,
                text TEXT NOT NULL,
                metadata TEXT,
                updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
                FOREIGN KEY(dictionary_id) REFERENCES dictionaries(id) ON DELETE CASCADE,
                -- This constraint allows us to update existing words instead of duplicating them
                UNIQUE(dictionary_id, text)
            );
            "#,
        )
        .execute(&pool)
        .await
        .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
        Ok(Self {
            pool: pool,
            dict_name: "default_dict".into(),
        })
    }
    // Helper: Resolve dictionary name to ID
    async fn get_dict_id(&self) -> Result<i64, RepositoryError> {
        let row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
            .bind(&self.dict_name)
            .fetch_optional(&self.pool)
            .await
            .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
        match row {
            Some(r) => Ok(r.get("id")),
            None => Err(RepositoryError::NotFound(self.dict_name.clone())),
        }
    }
 }
 #[async_trait::async_trait]
 impl DictRepository for SqliteDictRepository {
    async fn create_dict(&self) -> Result<(), RepositoryError> {
        sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)")
            .bind(&self.dict_name)
            .execute(&self.pool)
            .await
            .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
        Ok(())
    }
    fn use_dict(&mut self, name: &str) {
        self.dict_name = name.to_string();
    }
    async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError> {
        let mut tx = self
            .pool
            .begin()
            .await
            .map_err(|_| RepositoryError::ConnectionFailed)?;
        // 1. Get Dict ID
        let dict_id_row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
            .bind(&self.dict_name)
            .fetch_optional(&mut *tx)
            .await
            .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
        let dict_id: i64 = match dict_id_row {
            Some(row) => row.get("id"),
            None => return Err(RepositoryError::NotFound(self.dict_name.clone())),
        };
        // 2. Batch Upsert
        for entry in entries {
            // We must wrap the HashMap in sqlx::types::Json so SQLx knows how to serialize it
            let meta_json = sqlx::types::Json(&entry.metadata);
            sqlx::query(
                r#"
                INSERT INTO entries (dictionary_id, text, metadata)
                VALUES (?, ?, ?)
                ON CONFLICT(dictionary_id, text) DO UPDATE SET
                    metadata = excluded.metadata,
                    updated_at = CURRENT_TIMESTAMP
                "#,
            )
            .bind(dict_id)
            .bind(&entry.text)
            .bind(meta_json)
            .execute(&mut *tx)
            .await
            .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
        }
        tx.commit()
            .await
            .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
        Ok(())
    }
    async fn fetch_many(&self, limit: usize, offset: usize) -> Result<Dict, RepositoryError> {
        // Get Dict ID
        let dict_id = self.get_dict_id().await?;
        // Query (Reading into the DTO)
        let dtos = sqlx::query_as::<_, SqliteEntryDto>(
            r#"
            SELECT id, text, metadata
            FROM entries
            WHERE dictionary_id = ?
            LIMIT ? OFFSET ?
            "#,
        )
        .bind(dict_id)
        .bind(limit as u32)
        .bind(offset as u32)
        .fetch_all(&self.pool)
        .await
        .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
        // 4. Convert DTOs to Domain Dict
        let mut entries_map = HashMap::new();
        for dto in dtos {
            let entry: DictEntry = dto.into(); // Converts DTO -> Entity
            // We safely unwrap because the DB guarantees an ID exists
            if let Some(id) = entry.id {
                entries_map.insert(id, entry);
            }
        }
        Ok(Dict {
            name: self.dict_name.clone(),
            entries: entries_map,
        })
    }
    async fn stream_batches(
        &self,
        batch_size: usize,
    ) -> Result<BoxStream<'_, Result<Vec<String>, RepositoryError>>, RepositoryError> {
        // 1. Resolve ID first
        let dict_id = self.get_dict_id().await?;
        // 2. Create the base query stream.
        // We do NOT use limit/offset. We let the DB stream rows via a cursor.
        let query_stream = sqlx::query("SELECT text FROM entries WHERE dictionary_id = ?")
            .bind(dict_id)
            .fetch(&self.pool);
        // 3. Transform the stream using Functional combinators
        let stream = query_stream
            // Map SQLx errors to Domain errors
            .map_err(|e| RepositoryError::StorageError(e.to_string()))
            // Extract the String from the Row
            .and_then(|row| async move {
                // 'text' is the column name
                let text: String = row
                    .try_get("text")
                    .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
                Ok(text)
            })
            // Group items into vectors of size `batch_size`
            .try_chunks(batch_size)
            // try_chunks returns a specific error type on failure, map it back
            .map_err(|e| {
                // logic to handle leftover elements if error occurs,
                // but for simplicity, we treat stream errors as fatal here
                RepositoryError::StorageError(e.to_string())
            });
        // 4. Box the stream to erase the complex iterator type (Type Erasure)
        Ok(Box::pin(stream))
    }
 }
--- a/lib/src/lib.rs
+++ b/lib/src/lib.rs
@ -1,10 +1,20 @@
-pub mod application;
+// pub mod application;
-pub mod core;
+// pub mod core;
-pub mod infrastructure;
+// pub mod infrastructure;
-pub mod presentation;
+// pub mod presentation;
-pub use self::application::config;
+// pub use self::application::config;
-pub use self::application::services::DictImporter;
+// pub use self::application::services::DictImporter;
-pub use self::core::system;
+// pub use self::core::system;
-pub use self::core::traits;
+// pub use self::core::traits;
-// pub use self::presentation::cli; // Removed as we are deleting it
+
 mod common;
 mod dictionary;
 pub mod sys_major;
 pub use self::common::DictRepository;
 pub use self::common::SystemDecoder;
 pub use self::common::SystemEncoder;
 pub use self::dictionary::DictImporter;
 pub use self::dictionary::JsonFileDictSource;
 pub use self::dictionary::SqliteDictRepository;
--- a/lib/src/presentation/cli.rs
+++ b/lib/src/presentation/cli.rs
@ -1,9 +0,0 @@
 pub mod cli_args;
 pub mod commands;
 pub mod defaults;
 pub mod traits;
 pub use self::cli_args::{
    CliArgs, Command, DecodeArgs, EncodeArgs, GlobalArgs, ImportDictArgs, ServerArgs,
 };
 pub use self::traits::ConfigurableCommand;
--- a/lib/src/presentation/server.rs
+++ b/lib/src/presentation/server.rs
@ -1,20 +0,0 @@
 use crate::application::config::ServerConfig;
 use tracing::info;
 pub async fn run(config: ServerConfig, blocker: impl std::future::Future<Output = ()>) {
    info!("Running server with config: {:#?}", config);
    tokio::select! {
        _ = server_loop() => {},
        _ = blocker => {
            info!("Shutting down server...");
        }
    }
 }
 async fn server_loop() {
    loop {
        tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
        info!("Health check... ");
    }
 }
--- a/lib/src/sys_major.rs
+++ b/lib/src/sys_major.rs
@ -0,0 +1,12 @@
 pub mod decoder;
 pub mod encoder;
 pub mod lvmap;
 pub mod rules_en; // TODO: pub?
 pub mod rules_pl; // TODO: pub?
 pub use self::decoder::Decoder;
 pub use self::encoder::Encoder;
 pub use self::lvmap::LenValueMap; // TODO: pub?
 #[cfg(test)]
 mod decoder_tests;
--- a/lib/src/sys_major/decoder.rs
+++ b/lib/src/sys_major/decoder.rs
@ -0,0 +1,122 @@
 use crate::common::{entities::DecodedValue, errors::CodecError, traits::SystemDecoder};
 #[derive(Debug, Default, Clone)]
 pub struct Rule {
    pub phoneme_in: String,
    pub phoneme_out: String,
    pub not_before: Vec<String>,
    pub not_after: Vec<String>,
    pub only_before: Vec<String>,
    pub only_after: Vec<String>,
 }
 impl Rule {
    pub fn into_lowercase(self) -> Self {
        Rule {
            phoneme_in: self.phoneme_in.to_lowercase(),
            phoneme_out: self.phoneme_out.to_lowercase(),
            not_before: Self::lower_vec(self.not_before),
            not_after: Self::lower_vec(self.not_after),
            only_before: Self::lower_vec(self.only_before),
            only_after: Self::lower_vec(self.only_after),
        }
    }
    fn lower_vec(vec: Vec<String>) -> Vec<String> {
        vec.into_iter().map(|s| s.to_lowercase()).collect()
    }
 }
 pub type Rules = Vec<Rule>;
 // pub struct rules {
 //     name: String,
 //     entries: Rules,
 // }
 /// (index, decoded value)
 type RuleMatches = Vec<(usize, String)>;
 pub struct Decoder {
    rules: Rules,
 }
 impl Decoder {
    pub fn new(rules: Rules) -> Self {
        Decoder {
            rules: Decoder::to_lower_rules(rules),
        }
    }
    fn to_lower_rules(rules: Rules) -> Rules {
        rules
            .into_iter()
            .map(|entry| entry.into_lowercase())
            .collect()
    }
    fn match_entry(&self, entry: &Rule, word: &str) -> RuleMatches {
        word.match_indices(&entry.phoneme_in)
            .filter(|(index, _)| self.is_context_matched(&entry, &word, *index))
            .map(|(index, _)| (index, entry.phoneme_out.clone()))
            .collect()
    }
    fn is_context_matched(&self, entry: &Rule, word: &str, index: usize) -> bool {
        let before_context = &word[..index];
        let after_context = &word[index + entry.phoneme_in.len()..];
        // dbg!(&before_context);
        // dbg!(&after_context);
        if entry
            .not_after
            .iter()
            .any(|prefix| before_context.ends_with(prefix))
        {
            return false;
        }
        if entry
            .not_before
            .iter()
            .any(|suffix| after_context.starts_with(suffix))
        {
            return false;
        }
        if !entry.only_after.is_empty()
            && entry
                .only_after
                .iter()
                .all(|prefix| !before_context.ends_with(prefix))
        {
            return false;
        }
        if !entry.only_before.is_empty()
            && entry
                .only_before
                .iter()
                .all(|suffix| !after_context.starts_with(suffix))
        {
            return false;
        }
        true
    }
 }
 impl SystemDecoder for Decoder {
    fn decode(&self, word: &str) -> Result<DecodedValue, CodecError> {
        let mut matches: RuleMatches = self
            .rules
            .iter()
            .flat_map(|entry| self.match_entry(&entry, &word.to_lowercase()))
            .collect();
        matches.sort_by_key(|&(pos, _)| pos);
        let num_str: String = matches.into_iter().map(|(_, value)| value).collect();
        DecodedValue::new(num_str)
    }
 }
--- a/lib/src/sys_major/decoder_tests.rs
+++ b/lib/src/sys_major/decoder_tests.rs
@ -0,0 +1,134 @@
 use super::decoder::{Decoder, Rule, Rules};
 use crate::common::traits::SystemDecoder;
 #[cfg(test)]
 mod tests {
    use super::*;
    fn create_single_rules() -> Rules {
        vec![Rule {
            phoneme_in: "B".to_string(),
            phoneme_out: "2".to_string(),
            not_after: vec!["Y".to_string()],
            not_before: vec!["X".to_string()],
            only_after: vec!["A".to_string()],
            only_before: vec!["C".to_string()],
        }]
    }
    fn create_single_rules_min() -> Rules {
        vec![Rule {
            phoneme_in: "B".to_string(),
            phoneme_out: "2".to_string(),
            ..Default::default()
        }]
    }
    fn create_double_rules() -> Rules {
        vec![
            Rule {
                phoneme_in: "CD".to_string(),
                phoneme_out: "2".to_string(),
                not_after: vec!["00".to_string(), "YZ".to_string()],
                not_before: vec!["11".to_string(), "WX".to_string()],
                only_after: vec!["22".to_string(), "AB".to_string()],
                only_before: vec!["33".to_string(), "EF".to_string()],
            },
            Rule {
                phoneme_in: "MN".to_string(),
                phoneme_out: "3".to_string(),
                ..Default::default()
            },
        ]
    }
    #[test]
    fn test_single_symbol_encoding_only_before_only_after_matched() {
        let decoder = Decoder::new(create_single_rules());
        let output = decoder.decode("ABC").unwrap();
        assert_eq!(output, "2")
    }
    #[test]
    fn test_double_symbol_encoding_only_before_only_after_matched() {
        let decoder = Decoder::new(create_double_rules());
        let output = decoder.decode("ABCDEF").unwrap();
        assert_eq!(output, "2")
    }
    #[test]
    fn test_single_symbol_encoding_only_before_not_matched_with_other() {
        let decoder = Decoder::new(create_single_rules());
        let output = decoder.decode("DBC").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_double_symbol_encoding_only_before_not_matched_with_other() {
        let decoder = Decoder::new(create_double_rules());
        let output = decoder.decode("AACDEE").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_case_insensitivity() {
        let decoder = Decoder::new(create_double_rules());
        let output = decoder.decode("abcdef").unwrap();
        assert_eq!(output, "2")
    }
    #[test]
    fn test_single_symbol_encoding_only_before_not_matched_with_empty() {
        let decoder = Decoder::new(create_single_rules());
        let output = decoder.decode("BC").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_single_symbol_encoding_only_before_not_matched_with_not_before() {
        let decoder = Decoder::new(create_single_rules());
        let output = decoder.decode("XBC").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_single_symbol_encoding_only_after_not_matched_with_other() {
        let decoder = Decoder::new(create_single_rules());
        let output = decoder.decode("ABD").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_single_symbol_encoding_only_after_not_matched_with_empty() {
        let decoder = Decoder::new(create_single_rules());
        let output = decoder.decode("AB").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_single_symbol_encoding_only_after_not_matched_with_not_after() {
        let decoder = Decoder::new(create_single_rules());
        let output = decoder.decode("ABY").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_single_symbol_encoding_empty_before_after_matched_with_empty() {
        let decoder = Decoder::new(create_single_rules_min());
        let output = decoder.decode("B").unwrap();
        assert_eq!(output, "2")
    }
    #[test]
    fn test_single_symbol_encoding_empty_before_after_matched_with_others() {
        let decoder = Decoder::new(create_single_rules_min());
        let output = decoder.decode("AXBYC").unwrap();
        assert_eq!(output, "2")
    }
    #[test]
    fn test_encoding_multiple_phonemes() {
        let decoder = Decoder::new(create_double_rules());
        let output = decoder.decode("VvmNabCd33mn00CD22cdefmn").unwrap();
        assert_eq!(output, "32323")
    }
 }
--- a/lib/src/sys_major/encoder.rs
+++ b/lib/src/sys_major/encoder.rs
@ -0,0 +1,179 @@
 use crate::common::{
    entities::{EncodedPart, EncodedSplit, EncodedValue},
    errors::CodecError,
    traits::*,
 };
 use super::lvmap::LenValueMap;
 #[derive(Debug)]
 pub struct Encoder {
    lv_map: LenValueMap,
 }
 impl Encoder {
    pub fn new(lv_map: LenValueMap) -> Self {
        Encoder { lv_map }
    }
 }
 impl SystemEncoder for Encoder {
    fn initialize(&self) -> Result<(), CodecError> {
        Ok(())
    }
    fn encode(&self, input: &str) -> Result<EncodedValue, CodecError> {
        let size = input.chars().count();
        let max_mask: usize = (1 << (size - 1)) - 1;
        let indices: Vec<usize> = input.char_indices().map(|(i, _)| i).collect();
        let mut results = Vec::with_capacity(max_mask);
        for mask in 0..=max_mask {
            let mut parts: Vec<String> = Vec::new();
            let mut last_split = input.char_indices().count(); // we go from right to left to start with the longest parts
            // Iterate through the mask bits to find where to split
            for i in 0..size - 1 {
                // Check if the i-th bit is set
                if (mask >> i) & 1 == 1 {
                    // The split corresponds to the byte index of the (i+1)-th character
                    let split_idx = indices[indices.len() - i - 1];
                    parts.push(input[split_idx..last_split].to_string());
                    last_split = split_idx;
                }
            }
            // Push the remaining part of the string
            parts.push(input[..last_split].to_string());
            let mut all_matched = true;
            let mut split = EncodedSplit::new();
            parts.reverse();
            for part in &parts {
                let Ok(num_part) = part.parse::<u64>() else {
                    all_matched = false;
                    break;
                };
                let Some(words) = self.lv_map.get(part.len() as u8, num_part) else {
                    all_matched = false;
                    break;
                };
                split.push(EncodedPart {
                    value: num_part,
                    words: words.clone(),
                });
            }
            if all_matched {
                results.push(Partition {
                    value: split,
                    // To find the "most equal" size, we minimize the sum of squared lengths.
                    // (This mathematically minimizes variance without needing floating point math)
                    sum_sq_len: parts.iter().map(|p| p.chars().count().pow(2)).sum(),
                });
            }
            // Calculate metrics for sorting
            // let num_parts = parts.len();
            // // To find the "most equal" size, we minimize the sum of squared lengths.
            // // (This mathematically minimizes variance without needing floating point math)
            // let sum_sq_len: usize = parts.iter().map(|p| p.chars().count().pow(2)).sum();
            // if let Some(words) = self.lv_map.get(size as u8, input.parse().unwrap()) {
            //     results.push(Partition {
            //         parts: words.clone(),
            //         sum_sq_len,
            //     });
            // }
        }
        // Ok(EncodedValue::new(words))
        // Sort by:
        // 1. Fewer parts first (1 part, then 2 parts...)
        // 2. Most equal lengths (lower sum of squared lengths is more balanced)
        // 3. TODO: Lexicographically (for deterministic stability)?
        results.sort_by(|a, b| {
            a.value
                .len()
                .cmp(&b.value.len())
                .then(a.sum_sq_len.cmp(&b.sum_sq_len))
        });
        // Extract just the strings
        let split_results = results.into_iter().map(|p| p.value).collect();
        Ok(EncodedValue::new(split_results))
    }
 }
 // A helper struct to keep the split variant and its sort metrics together
 struct Partition {
    value: EncodedSplit,
    sum_sq_len: usize,
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_encode_as_single_length_result() {
        let mut lvmap = LenValueMap::new();
        lvmap.push(3, 123, "test_123");
        lvmap.push(3, 345, "test_345_1");
        lvmap.push(3, 345, "test_345_2");
        lvmap.push(3, 678, "test_678");
        let encoder = Encoder::new(lvmap);
        let result = encoder.encode("345").unwrap();
        assert_eq!(result.len(), 1); // single split
        assert_eq!(result[0].len(), 1); // single part
        assert_eq!(result[0][0].value, 345);
        assert_eq!(result[0][0].words.len(), 2); // two words
        assert_eq!(result[0][0].words[0], "test_345_1");
        assert_eq!(result[0][0].words[1], "test_345_2");
    }
    #[test]
    fn test_encode_as_all_lengths() {
        let mut lvmap = LenValueMap::new();
        lvmap.push(1, 0, "test_0");
        lvmap.push(1, 9, "test_9");
        lvmap.push(1, 8, "test_8");
        lvmap.push(1, 7, "test_7");
        lvmap.push(2, 98, "test_98");
        lvmap.push(2, 87, "test_87");
        lvmap.push(3, 987, "test_987");
        lvmap.push(3, 876, "test_876");
        let encoder = Encoder::new(lvmap);
        let result = encoder.encode("987").unwrap();
        assert_eq!(result.len(), 4); // 987, 98|7, 9|87, 9|8|7
        assert_eq!(result[0].len(), 1); // 987
        assert_eq!(result[0][0].words.len(), 1);
        assert_eq!(result[0][0].words[0], "test_987");
        assert_eq!(result[1].len(), 2); // 98|7
        assert_eq!(result[1][0].words.len(), 1);
        assert_eq!(result[1][0].words[0], "test_98");
        assert_eq!(result[1][1].words.len(), 1);
        assert_eq!(result[1][1].words[0], "test_7");
        assert_eq!(result[2].len(), 2); // 9|87
        assert_eq!(result[2][0].words.len(), 1);
        assert_eq!(result[2][0].words[0], "test_9");
        assert_eq!(result[2][1].words.len(), 1);
        assert_eq!(result[2][1].words[0], "test_87");
        assert_eq!(result[3].len(), 3); // 9|8|7
        assert_eq!(result[3][0].words.len(), 1);
        assert_eq!(result[3][0].words[0], "test_9");
        assert_eq!(result[3][1].words.len(), 1);
        assert_eq!(result[3][1].words[0], "test_8");
        assert_eq!(result[3][2].words.len(), 1);
        assert_eq!(result[3][2].words[0], "test_7");
    }
 }
--- a/lib/src/sys_major/lvmap.rs
+++ b/lib/src/sys_major/lvmap.rs
@ -0,0 +1,351 @@
 use crate::common::{
    SystemDecoder,
    entities::DecodedLength,
    errors::{CodecError, RepositoryError},
 };
 use futures::{Stream, StreamExt};
 use std::{collections::HashMap, num::ParseIntError};
 use thiserror::Error;
 // We store words by encoded number length, then encoded value
 // Example:
 // root:
 //   - 3:
 //     - 750:
 //       - word: klasa
 //       - word: gilza
 //     - 849:
 //       - word: farba
 //   - 2:
 //     - 45:
 //       - word: oral
 #[derive(Error, Debug)]
 pub enum LenValueMapError {
    #[error("value parsing error: {0}")]
    Parse(#[from] ParseIntError),
    #[error(transparent)]
    Codec(#[from] CodecError),
    #[error(transparent)]
    Repository(#[from] RepositoryError),
    #[error("unable to build encoder data: {0}")]
    Build(String),
 }
 type DecodedNumber = u64;
 pub type LenValueData = HashMap<DecodedLength, HashMap<DecodedNumber, Vec<String>>>;
 #[derive(Debug, Default, Clone)]
 pub struct LenValueMap {
    data: LenValueData,
 }
 impl LenValueMap {
    pub fn new() -> Self {
        Self::default()
    }
    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }
    pub fn into_data(self) -> LenValueData {
        self.data
    }
    pub fn push(&mut self, len: u8, num: DecodedNumber, word: impl Into<String>) -> &mut Self {
        self.data
            .entry(DecodedLength::from(len))
            .or_insert_with(HashMap::new)
            .entry(num)
            .or_insert_with(Vec::new)
            .push(word.into());
        self
    }
    pub fn get(&self, len: u8, num: DecodedNumber) -> Option<&Vec<String>> {
        self.data.get(&DecodedLength::from(len))?.get(&num)
    }
    pub fn insert_words<I>(
        &mut self,
        words: I,
        decoder: &dyn SystemDecoder,
    ) -> Result<(), LenValueMapError>
    where
        I: IntoIterator<Item = String>,
    {
        for word in words {
            if word.is_empty() {
                continue;
            }
            let decoded = decoder.decode(&word)?;
            if decoded.is_empty() {
                continue;
            }
            self.data
                .entry(decoded.value_len()?)
                .or_default()
                .entry(decoded.parse()?)
                .or_default()
                .push(word);
        }
        Ok(())
    }
    pub fn from_data(data: LenValueData) -> Self {
        Self { data: data }
    }
    pub async fn from_stream<S>(
        stream: S,
        decoder: &dyn SystemDecoder,
    ) -> Result<Self, LenValueMapError>
    where
        // S is a stream of "Result<Vec<String>, Error>"
        S: Stream<Item = Result<Vec<String>, RepositoryError>>,
    {
        let mut map = LenValueMap::new();
        let mut stream = Box::pin(stream);
        // We stream the batches one by one.
        // This ensures only one batch is in memory at a time.
        while let Some(batch_result) = stream.next().await {
            match batch_result {
                Ok(batch) => {
                    // We delegate to the synchronous logic for the heavy lifting
                    map.insert_words(batch, decoder)?;
                }
                Err(e) => {
                    // Convert RepositoryError to LenValueMapError::Build
                    return Err(e.into());
                }
            }
        }
        Ok(map)
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::common::{entities::*, errors::*};
    use futures::stream;
    use std::collections::HashMap;
    use mockall::{mock, predicate::*};
    const TEST_WORD_1: &str = "test_word_1";
    const TEST_WORD_2: &str = "test_word_2";
    const TEST_WORD_3: &str = "test_word_3";
    const TEST_WORD_4: &str = "test_word_4";
    const TEST_NUM_1: u64 = 12;
    const TEST_NUM_2: u64 = 34;
    const TEST_NUM_3: u64 = 9876;
    const TEST_NUM_1_LEN: DecodedLength = DecodedLength::from(2);
    const TEST_NUM_3_LEN: DecodedLength = DecodedLength::from(4);
    fn decoded_value(n: u64) -> DecodedValue {
        DecodedValue::new(n.to_string()).unwrap()
    }
    fn get_test_dec_map() -> HashMap<String, DecodedValue> {
        HashMap::from([
            (TEST_WORD_1.to_string(), decoded_value(TEST_NUM_1)),
            (TEST_WORD_2.to_string(), decoded_value(TEST_NUM_2)),
            (TEST_WORD_3.to_string(), decoded_value(TEST_NUM_3)),
            (TEST_WORD_4.to_string(), decoded_value(TEST_NUM_3)),
        ])
    }
    fn mock_decoding(word: &str) -> Result<DecodedValue, CodecError> {
        get_test_dec_map()
            .remove(word)
            .ok_or_else(|| CodecError::UnexpectedError("".to_string()))
    }
    fn get_test_words() -> Vec<String> {
        vec![
            TEST_WORD_1.to_string(),
            TEST_WORD_2.to_string(),
            TEST_WORD_3.to_string(),
            TEST_WORD_4.to_string(),
        ]
    }
    mock! {
        pub Decoder {}
        impl SystemDecoder for Decoder {
            fn decode(&self, word: &str) -> Result<DecodedValue, CodecError>;
        }
    }
    #[test]
    fn test_single_word() {
        let words = vec![TEST_WORD_1.to_string()];
        let mut decoder = MockDecoder::new();
        decoder
            .expect_decode()
            .returning(|word| mock_decoding(word));
        let mut lv_map = LenValueMap::new();
        lv_map.insert_words(words, &decoder).unwrap();
        let data = lv_map.into_data();
        assert_eq!(data.len(), 1);
        assert!(data.contains_key(&TEST_NUM_1_LEN));
        let data = data.get(&TEST_NUM_1_LEN).unwrap();
        assert!(data.contains_key(&TEST_NUM_1));
        let words = data.get(&TEST_NUM_1).unwrap();
        assert_eq!(words.len(), 1);
        assert_eq!(words[0], TEST_WORD_1);
    }
    #[test]
    fn test_multiple_words() {
        let words = get_test_words();
        let mut decoder = MockDecoder::new();
        decoder
            .expect_decode()
            .returning(|word| mock_decoding(word));
        let mut lv_map = LenValueMap::new();
        lv_map.insert_words(words, &decoder).unwrap();
        let data = lv_map.into_data();
        assert_eq!(data.len(), 2); // two different lengths
        assert!(data.contains_key(&TEST_NUM_1_LEN));
        assert!(data.contains_key(&TEST_NUM_3_LEN));
        let l2 = data.get(&TEST_NUM_1_LEN).unwrap();
        let l4 = data.get(&TEST_NUM_3_LEN).unwrap();
        assert_eq!(l2.len(), 2); // two numbers
        assert_eq!(l4.len(), 1); // one number
        assert!(l2.contains_key(&TEST_NUM_1));
        assert!(l2.contains_key(&TEST_NUM_2));
        assert!(l4.contains_key(&TEST_NUM_3));
        let words = l2.get(&TEST_NUM_1).unwrap();
        assert_eq!(words.len(), 1);
        assert_eq!(words[0], TEST_WORD_1);
        let words = l2.get(&TEST_NUM_2).unwrap();
        assert_eq!(words.len(), 1);
        assert_eq!(words[0], TEST_WORD_2);
        let words = l4.get(&TEST_NUM_3).unwrap();
        assert_eq!(words.len(), 2);
        assert!(words.contains(&TEST_WORD_3.to_string()));
        assert!(words.contains(&TEST_WORD_4.to_string()));
    }
    #[test]
    fn test_skip_empty_decodes() {
        let words = vec![TEST_WORD_1.to_string(), TEST_WORD_2.to_string()];
        let mut decoder = MockDecoder::new();
        decoder.expect_decode().returning(|word| {
            if word == TEST_WORD_1 {
                DecodedValue::new("".to_string())
            } else {
                DecodedValue::new(TEST_NUM_2.to_string())
            }
        });
        let mut lv_map = LenValueMap::new();
        lv_map.insert_words(words, &decoder).unwrap();
        let data = lv_map.into_data();
        assert_eq!(data.len(), 1);
        assert!(data.contains_key(&TEST_NUM_1_LEN));
        let data = data.get(&TEST_NUM_1_LEN).unwrap();
        assert!(data.contains_key(&TEST_NUM_2));
        let words = data.get(&TEST_NUM_2).unwrap();
        assert_eq!(words.len(), 1);
        assert_eq!(words[0], TEST_WORD_2);
    }
    #[test]
    fn test_decoder_error_propagates() {
        let mut decoder = MockDecoder::new();
        decoder
            .expect_decode()
            .returning(|_| Err(CodecError::UnexpectedError("boom".into())));
        let mut map = LenValueMap::new();
        let result = map.insert_words(vec!["x".into()], &decoder);
        assert!(result.is_err());
    }
    // --- build ---
    #[tokio::test]
    async fn test_from_stream_success() {
        // 1. Setup Mocks (Same as before)
        let mut decoder = MockDecoder::new();
        decoder
            .expect_decode()
            .returning(|word| mock_decoding(word));
        // 2. Prepare Data
        // We wrap the inner Vecs in Ok() because the stream expects Result<Vec<String>, RepositoryError>
        let batches = vec![
            Ok(vec![TEST_WORD_1.into(), TEST_WORD_2.into()]),
            Ok(vec![TEST_WORD_3.into(), TEST_WORD_4.into()]),
        ];
        // 3. Create a Stream from the Vec
        // stream::iter converts an IntoIterator into a Stream
        let stream = stream::iter(batches);
        // 4. Inject the stream (Dependency Injection)
        let map = LenValueMap::from_stream(stream, &decoder)
            .await
            .expect("Should build map successfully");
        // 5. Assertions
        let data = map.into_data();
        assert_eq!(data.len(), 2);
        assert!(data.contains_key(&TEST_NUM_1_LEN));
        assert!(data.contains_key(&TEST_NUM_3_LEN));
    }
    #[tokio::test]
    async fn test_from_stream_failure() {
        let mut decoder = MockDecoder::new();
        decoder
            .expect_decode()
            .returning(|word| mock_decoding(word));
        let batches = vec![
            Ok(vec![TEST_WORD_1.into()]),
            Err(RepositoryError::ConnectionFailed),
            Ok(vec![TEST_WORD_3.into()]),
        ];
        let stream = stream::iter(batches);
        let result = LenValueMap::from_stream(stream, &decoder).await;
        match result {
            // We match specifically on the Repository variant and the ConnectionFailed inner error
            Err(LenValueMapError::Repository(RepositoryError::ConnectionFailed)) => {
                // Success! The correct error type propagated up.
            }
            // If it's any other error (including a stringified one), we fail
            _ => panic!(
                "Expected LenValueMapError::Repository(ConnectionFailed), got {:?}",
                result
            ),
        }
    }
 }
--- a/lib/src/sys_major/removeme.old/application.rs
+++ b/lib/src/sys_major/removeme.old/application.rs
--- a/lib/src/sys_major/removeme.old/application/config.rs
+++ b/lib/src/sys_major/removeme.old/application/config.rs
--- a/lib/src/sys_major/removeme.old/application/errors.rs
+++ b/lib/src/sys_major/removeme.old/application/errors.rs
--- a/lib/src/sys_major/removeme.old/application/services.rs
+++ b/lib/src/sys_major/removeme.old/application/services.rs
--- a/lib/src/sys_major/removeme.old/application/traits.rs
+++ b/lib/src/sys_major/removeme.old/application/traits.rs
--- a/lib/src/sys_major/removeme.old/core.rs
+++ b/lib/src/sys_major/removeme.old/core.rs
--- a/lib/src/sys_major/removeme.old/core/entities.rs
+++ b/lib/src/sys_major/removeme.old/core/entities.rs
@ -0,0 +1,146 @@
 use super::errors::CodecError;
 use serde::Serialize;
 use std::num::ParseIntError;
 use std::ops::Deref;
 use std::{collections::HashMap, u64};
 /// A number encoded as a sequence of words
 #[derive(Debug, Clone, Serialize)]
 pub struct EncodedPart {
    pub value: u64,
    pub words: Vec<String>,
 }
 /// A way (variant) to split input number
 pub type EncodedSplit = Vec<EncodedPart>;
 /// A number encoded as words, split in multiple ways
 #[derive(Debug, Clone, Serialize)]
 pub struct EncodedValue(Vec<EncodedSplit>);
 impl EncodedValue {
    pub fn new(data: Vec<EncodedSplit>) -> Self {
        EncodedValue(data)
    }
 }
 impl Deref for EncodedValue {
    type Target = Vec<EncodedSplit>;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
 }
 /// The number value can be encoded as many word sets,
 /// but decoded as one number. For partial values
 /// and dictionary words (reasonable length), we can use
 /// u64 (20-digit number), but the whole input text can
 /// be longer than 20 digits, so we operate on String (<= 255).
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct DecodedValue(String);
 impl DecodedValue {
    pub fn new(value: String) -> Result<Self, CodecError> {
        if value.len() > u8::MAX as usize {
            Err(CodecError::TextTooLong(value.len()))
        } else {
            Ok(Self(value))
        }
    }
    pub fn as_str(&self) -> &str {
        &self.0
    }
    pub fn parse(&self) -> Result<u64, ParseIntError> {
        self.0.parse()
    }
    pub fn len(&self) -> usize {
        self.0.len()
    }
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }
    pub fn value_len(&self) -> Result<DecodedLength, CodecError> {
        if self.len() == 0 {
            return Err(CodecError::EmptyValue);
        }
        DecodedLength::try_from(self.len())
    }
 }
 impl PartialEq<&str> for DecodedValue {
    fn eq(&self, other: &&str) -> bool {
        &self.0 == *other
    }
 }
 impl PartialEq<DecodedValue> for &str {
    fn eq(&self, other: &DecodedValue) -> bool {
        *self == &other.0
    }
 }
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
 pub struct DecodedLength(u8);
 impl DecodedLength {
    pub const fn from(value: u8) -> Self {
        Self(value)
    }
 }
 impl TryFrom<usize> for DecodedLength {
    type Error = CodecError;
    fn try_from(value: usize) -> Result<Self, CodecError> {
        if value > u8::MAX as usize {
            Err(CodecError::ValueLimitExceeded(value))
        } else {
            Ok(Self(value as u8))
        }
    }
 }
 // --- Dictionary ---
 pub type DictEntryId = u64;
 #[derive(Debug, Clone, PartialEq)]
 pub struct DictEntry {
    pub id: Option<DictEntryId>,
    pub text: String,
    pub metadata: HashMap<String, String>,
 }
 impl DictEntry {
    pub fn new(id: Option<DictEntryId>, text: String) -> Self {
        DictEntry {
            id,
            text,
            metadata: HashMap::new(),
        }
    }
 }
 #[derive(Debug, Clone)]
 pub struct Dict {
    pub name: String,
    pub entries: HashMap<DictEntryId, DictEntry>,
 }
 impl Dict {
    pub fn new(name: String) -> Self {
        Dict {
            name,
            entries: HashMap::new(),
        }
    }
    pub fn add_entry(&mut self, entry: DictEntry) {
        self.entries.insert(entry.id.unwrap(), entry);
    }
 }
--- a/lib/src/sys_major/removeme.old/core/errors.rs
+++ b/lib/src/sys_major/removeme.old/core/errors.rs
@ -0,0 +1,31 @@
 use thiserror::Error;
 #[derive(Error, Debug)]
 pub enum RepositoryError {
    #[error("Data source connection failed")]
    ConnectionFailed,
    #[error("'{0}' not found")]
    NotFound(String),
    #[error("Storage error: {0}")]
    StorageError(String),
 }
 #[derive(Debug, Error)]
 pub enum CodecError {
    #[error("text too long: {0} bytes")]
    TextTooLong(usize),
    #[error("value too large: {0}/255")]
    ValueLimitExceeded(usize),
    #[error("operation not allowed on empty value")]
    EmptyValue,
    #[error("initialization failed")]
    InitializationFailed,
    #[error("unexpected error: {0}")]
    UnexpectedError(String),
 }
--- a/lib/src/sys_major/removeme.old/core/sys_major.rs
+++ b/lib/src/sys_major/removeme.old/core/sys_major.rs
--- a/lib/src/sys_major/removeme.old/core/sys_major/decoder.rs
+++ b/lib/src/sys_major/removeme.old/core/sys_major/decoder.rs
--- a/lib/src/sys_major/removeme.old/core/sys_major/decoder_tests.rs
+++ b/lib/src/sys_major/removeme.old/core/sys_major/decoder_tests.rs
--- a/lib/src/sys_major/removeme.old/core/sys_major/encoder.rs
+++ b/lib/src/sys_major/removeme.old/core/sys_major/encoder.rs
--- a/lib/src/sys_major/removeme.old/core/sys_major/lvmap.rs
+++ b/lib/src/sys_major/removeme.old/core/sys_major/lvmap.rs
--- a/lib/src/sys_major/removeme.old/core/sys_major/rules_en.rs
+++ b/lib/src/sys_major/removeme.old/core/sys_major/rules_en.rs
--- a/lib/src/sys_major/removeme.old/core/sys_major/rules_pl.rs
+++ b/lib/src/sys_major/removeme.old/core/sys_major/rules_pl.rs
--- a/lib/src/sys_major/removeme.old/core/system.rs
+++ b/lib/src/sys_major/removeme.old/core/system.rs
--- a/lib/src/sys_major/removeme.old/core/traits.rs
+++ b/lib/src/sys_major/removeme.old/core/traits.rs
--- a/lib/src/sys_major/removeme.old/infrastructure.rs
+++ b/lib/src/sys_major/removeme.old/infrastructure.rs
--- a/lib/src/sys_major/removeme.old/infrastructure/errors.rs
+++ b/lib/src/sys_major/removeme.old/infrastructure/errors.rs
--- a/lib/src/sys_major/removeme.old/infrastructure/json_file_dict_source.rs
+++ b/lib/src/sys_major/removeme.old/infrastructure/json_file_dict_source.rs
--- a/lib/src/sys_major/removeme.old/infrastructure/sqlite_dict_repository.rs
+++ b/lib/src/sys_major/removeme.old/infrastructure/sqlite_dict_repository.rs
--- a/lib/src/sys_major/removeme.old/presentation.rs
+++ b/lib/src/sys_major/removeme.old/presentation.rs
--- a/lib/src/sys_major/rules_en.rs
+++ b/lib/src/sys_major/rules_en.rs
@ -0,0 +1,15 @@
 use super::decoder::{Rule, Rules};
 pub fn get_rules() -> Rules {
    vec![
        Rule {
            phoneme_in: "EN".to_string(),
            phoneme_out: "2".to_string(),
            not_after: vec!["Y".to_string()],
            not_before: vec!["X".to_string()],
            only_after: vec!["A".to_string()],
            only_before: vec!["C".to_string()],
        },
        // ...more entries...
    ]
 }
--- a/lib/src/sys_major/rules_pl.rs
+++ b/lib/src/sys_major/rules_pl.rs
@ -0,0 +1,222 @@
 use super::decoder::{Rule, Rules};
 pub fn get_rules() -> Rules {
    vec![
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "S".to_string(),
            phoneme_out: "0".to_string(),
            not_before: vec!["I".to_string(), "Z".to_string()],
            only_before: vec![],
        },
        Rule {
            not_after: vec![
                "C".to_string(),
                "D".to_string(),
                "R".to_string(),
                "S".to_string(),
            ],
            only_after: vec![],
            phoneme_in: "Z".to_string(),
            phoneme_out: "0".to_string(),
            not_before: vec!["I".to_string()],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "T".to_string(),
            phoneme_out: "1".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            only_after: vec![],
            not_after: vec![],
            phoneme_in: "D".to_string(),
            phoneme_out: "1".to_string(),
            not_before: vec!["Z".to_string(), "Ź".to_string(), "Ż".to_string()],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "N".to_string(),
            phoneme_out: "2".to_string(),
            not_before: vec!["I".to_string()],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "M".to_string(),
            phoneme_out: "3".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "R".to_string(),
            phoneme_out: "4".to_string(),
            not_before: vec!["Z".to_string()],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "L".to_string(),
            phoneme_out: "5".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "J".to_string(),
            phoneme_out: "6".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "K".to_string(),
            phoneme_out: "7".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "G".to_string(),
            phoneme_out: "7".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "F".to_string(),
            phoneme_out: "8".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "W".to_string(),
            phoneme_out: "8".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "P".to_string(),
            phoneme_out: "9".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
        Rule {
            not_after: vec![],
            only_after: vec![],
            phoneme_in: "B".to_string(),
            phoneme_out: "9".to_string(),
            not_before: vec![],
            only_before: vec![],
        },
    ]
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::core::sys_major::Decoder;
    use crate::traits::SystemDecoder;
    #[test]
    fn test_major_dict_pl_decode_0_1() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("SZSCZ").unwrap();
        assert_eq!(output, "0")
    }
    #[test]
    fn test_major_dict_pl_decode_0_2() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("SZSICZ").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_major_dict_pl_decode_0_3() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("SZCZRZZCZDZSZ").unwrap();
        assert_eq!(output, "0")
    }
    #[test]
    fn test_major_dict_pl_decode_0_4() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("SZCZRZZICZDZSZ").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_major_dict_pl_decode_1_1() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("SZTCZ").unwrap();
        assert_eq!(output, "1")
    }
    #[test]
    fn test_major_dict_pl_decode_1_2() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("DZDŻDŹDDZDŻDŹ").unwrap();
        assert_eq!(output, "1")
    }
    #[test]
    fn test_major_dict_pl_decode_1_3() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("DZDŻDŹDZDZDŻDŹ").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_major_dict_pl_decode_2_1() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("NINNI").unwrap();
        assert_eq!(output, "2")
    }
    #[test]
    fn test_major_dict_pl_decode_2_2() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("NININI").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_major_dict_pl_decode_4_1() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("RZRRZ").unwrap();
        assert_eq!(output, "4")
    }
    #[test]
    fn test_major_dict_pl_decode_4_2() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("RZRZRZ").unwrap();
        assert_eq!(output, "")
    }
    #[test]
    fn test_major_dict_pl_decode_full_1() {
        let decoder = Decoder::new(get_rules());
        let output = decoder.decode("ATADANAMARALAJAKAGAFAWAPABA").unwrap();
        assert_eq!(output, "1123456778899")
    }
 }
		`@ -0,0 +1,2 @@`
							`pub mod json_file_dict_source;`
							`pub mod sqlite_dict_repository;`