Browse Source

WIP: import/export

develop-refactor
chodak166 5 months ago
parent
commit
0be1288638
  1. 2
      app/src/app.rs
  2. 2
      lib/Cargo.toml
  3. 48
      lib/src/application/errors.rs
  4. 12
      lib/src/application/services.rs
  5. 39
      lib/src/application/traits.rs
  6. 8
      lib/src/core/entities.rs
  7. 12
      lib/src/infrastructure/errors.rs
  8. 4
      lib/src/infrastructure/json_file_dict_source.rs
  9. 405
      lib/src/infrastructure/sqlite_dict_repository.rs
  10. 2
      lib/src/presentation/cli/commands/import_dict.rs

2
app/src/app.rs

@ -19,7 +19,9 @@ impl Application {
let config = AppConfig::build(&args.global, &args.command)?; let config = AppConfig::build(&args.global, &args.command)?;
tracing_subscriber::fmt() tracing_subscriber::fmt()
.compact()
.with_env_filter(&config.log_level) .with_env_filter(&config.log_level)
.with_target(false)
.init(); .init();
debug!("Bootstrapping application..."); debug!("Bootstrapping application...");

2
lib/Cargo.toml

@ -17,4 +17,4 @@ chrono = { version = "0.4", features = ["serde"] }
thiserror = "1.0" thiserror = "1.0"
async-trait = "0.1" async-trait = "0.1"
parking_lot = "0.12" parking_lot = "0.12"
sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "sqlite", "chrono", "migrate"] } sqlx = { version = "0.7", features = ["runtime-tokio", "sqlite", "chrono", "migrate"] }

48
lib/src/application/errors.rs

@ -1,15 +1,39 @@
#[derive(Debug)] // #[derive(Debug)]
// pub enum RepositoryError {
// NotFound,
// ConnectionFailed,
// InvalidData(String),
// Unexpected(String),
// }
// impl std::fmt::Display for RepositoryError {
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// write!(f, "{:?}", self)
// }
// }
// impl std::error::Error for RepositoryError {}
use thiserror::Error;
// #[derive(Error, Debug)]
// pub enum RepositoryError {
// #[error("Database connection failed")]
// ConnectionFailed(#[source] sqlx::Error),
// #[error("Database query failed: {0}")]
// QueryFailed(#[source] sqlx::Error), //TODO: sqlx id infrastructure
// #[error("Dictionary '{0}' not found")]
// NotFound(String),
// #[error("Invalid data encountered")]
// InvalidData,
// }
#[derive(Error, Debug)]
pub enum RepositoryError { pub enum RepositoryError {
NotFound, #[error("Database connection failed")]
ConnectionFailed, ConnectionFailed,
InvalidData(String), #[error("Dictionary '{0}' not found")]
Unexpected(String), NotFound(String),
} #[error("Storage error: {0}")]
StorageError(String),
impl std::fmt::Display for RepositoryError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
} }
impl std::error::Error for RepositoryError {}

12
lib/src/application/services.rs

@ -13,9 +13,13 @@ impl<'a, R: DictRepository> DictImporter<'a, R> {
} }
} }
pub fn import(&self, name: &str, mut source: impl DictSource) -> Result<(), anyhow::Error> { pub async fn import(
&self,
name: &str,
mut source: impl DictSource,
) -> Result<(), anyhow::Error> {
// 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?) // 1. Ensure Dict exists (Logic: Create if new, or maybe clear existing?)
self.repo.create(name)?; self.repo.create(name).await?;
let mut batch = Vec::with_capacity(self.batch_size); let mut batch = Vec::with_capacity(self.batch_size);
@ -30,7 +34,7 @@ impl<'a, R: DictRepository> DictImporter<'a, R> {
// 3. Batch Write // 3. Batch Write
if batch.len() >= self.batch_size { if batch.len() >= self.batch_size {
self.repo.save_entries(name, &batch)?; self.repo.save_entries(name, &batch).await?;
batch.clear(); batch.clear();
} }
} }
@ -44,7 +48,7 @@ impl<'a, R: DictRepository> DictImporter<'a, R> {
// 4. Flush remaining // 4. Flush remaining
if !batch.is_empty() { if !batch.is_empty() {
self.repo.save_entries(name, &batch)?; self.repo.save_entries(name, &batch).await?;
} }
Ok(()) Ok(())

39
lib/src/application/traits.rs

@ -3,20 +3,43 @@ use crate::{
core::entities::{Dict, DictEntry}, core::entities::{Dict, DictEntry},
}; };
pub trait DictRepository { // pub trait DictRepository {
fn create(&self, name: &str) -> Result<(), RepositoryError>; // fn create(&self, name: &str) -> Result<(), RepositoryError>;
// Batch saving is usually much faster than 1-by-1 for SQL // // Batch saving is usually much faster than 1-by-1 for SQL
fn save_entries(&self, dict_name: &str, entries: &[DictEntry]) -> Result<(), RepositoryError>; // fn save_entries(&self, dict_name: &str, entries: &[DictEntry]) -> Result<(), RepositoryError>;
fn fetch_many( // fn fetch_many(
// &self,
// name: &str,
// limit: Option<u32>,
// offset: Option<u32>,
// ) -> Result<Dict, RepositoryError>;
// // Get the next available ID for a dictionary
// fn get_next_id(&self, dict_name: &str) -> Result<u32, RepositoryError>;
// }
#[async_trait::async_trait]
pub trait DictRepository: Send + Sync {
async fn create(&self, name: &str) -> Result<(), RepositoryError>;
/// "Upsert" logic:
/// - If entry exists (by text), update metadata.
/// - If not, insert new.
/// - IDs are handled by the Database.
async fn save_entries(
&self,
dict_name: &str,
entries: &[DictEntry],
) -> Result<(), RepositoryError>;
/// Fetch a page of entries.
async fn fetch_many(
&self, &self,
name: &str, name: &str,
limit: Option<u32>, limit: Option<u32>,
offset: Option<u32>, offset: Option<u32>,
) -> Result<Dict, RepositoryError>; ) -> Result<Dict, RepositoryError>;
// Get the next available ID for a dictionary
fn get_next_id(&self, dict_name: &str) -> Result<u32, RepositoryError>;
} }
pub trait DictSource { pub trait DictSource {

8
lib/src/core/entities.rs

@ -1,16 +1,16 @@
use std::collections::HashMap; use std::collections::HashMap;
pub type DictEntryId = u32; pub type DictEntryId = u64;
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct DictEntry { pub struct DictEntry {
pub id: DictEntryId, pub id: Option<DictEntryId>,
pub text: String, pub text: String,
pub metadata: HashMap<String, String>, pub metadata: HashMap<String, String>,
} }
impl DictEntry { impl DictEntry {
pub fn new(id: DictEntryId, text: String) -> Self { pub fn new(id: Option<DictEntryId>, text: String) -> Self {
DictEntry { DictEntry {
id, id,
text, text,
@ -33,6 +33,6 @@ impl Dict {
} }
pub fn add_entry(&mut self, entry: DictEntry) { pub fn add_entry(&mut self, entry: DictEntry) {
self.entries.insert(entry.id, entry); self.entries.insert(entry.id.unwrap(), entry);
} }
} }

12
lib/src/infrastructure/errors.rs

@ -1 +1,13 @@
// use thiserror::Error;
// #[derive(Error, Debug)]
// pub enum RepositoryError {
// #[error("Database connection failed")]
// ConnectionFailed(#[source] sqlx::Error),
// #[error("Database query failed: {0}")]
// QueryFailed(#[source] sqlx::Error),
// #[error("Dictionary '{0}' not found")]
// NotFound(String),
// #[error("Invalid data encountered")]
// InvalidData,
// }

4
lib/src/infrastructure/json_file_dict_source.rs

@ -31,7 +31,7 @@ impl JsonFileDictSource {
// Convert to DictEntry with auto-generated IDs // Convert to DictEntry with auto-generated IDs
let mut entries = Vec::new(); let mut entries = Vec::new();
for (index, json_entry) in json_entries.into_iter().enumerate() { for (index, json_entry) in json_entries.into_iter().enumerate() {
let id = (index + 1) as u32; // Auto-generate ID starting from 1 let id = (index + 1) as u64; // Auto-generate ID starting from 1
// Convert metadata from serde_json::Value to HashMap<String, String> // Convert metadata from serde_json::Value to HashMap<String, String>
let metadata = if let Some(meta) = json_entry.metadata { let metadata = if let Some(meta) = json_entry.metadata {
@ -51,7 +51,7 @@ impl JsonFileDictSource {
}; };
entries.push(DictEntry { entries.push(DictEntry {
id, id: Some(id),
text: json_entry.word, text: json_entry.word,
metadata, metadata,
}); });

405
lib/src/infrastructure/sqlite_dict_repository.rs

@ -5,6 +5,30 @@ use sqlx::{Row, SqlitePool, sqlite::SqliteConnectOptions};
use std::collections::HashMap; use std::collections::HashMap;
use std::str::FromStr; use std::str::FromStr;
// --- DTO: Data Transfer Object ---
// This struct exists ONLY to talk to the database.
#[derive(sqlx::FromRow)]
struct SqliteEntryDto {
id: i64,
text: String,
// sqlx reads the DB column into this specific wrapper
metadata: sqlx::types::Json<HashMap<String, String>>,
}
// Mapper: DTO -> Domain Entity
impl From<SqliteEntryDto> for DictEntry {
fn from(dto: SqliteEntryDto) -> Self {
Self {
id: Some(dto.id as u64),
text: dto.text,
// Unwrap the sqlx wrapper to get the inner HashMap
metadata: dto.metadata.0,
}
}
}
// --- REPOSITORY IMPLEMENTATION ---
#[derive(Clone)] #[derive(Clone)]
pub struct SqliteDictRepository { pub struct SqliteDictRepository {
pool: SqlitePool, pool: SqlitePool,
@ -20,295 +44,154 @@ impl SqliteDictRepository {
.await .await
.map_err(|_| RepositoryError::ConnectionFailed)?; .map_err(|_| RepositoryError::ConnectionFailed)?;
// Run migrations // Ensure tables exist with proper Normalization and Constraints
sqlx::migrate!("./migrations")
.run(&pool)
.await
.map_err(|e| RepositoryError::Unexpected(format!("Failed to run migrations: {}", e)))?;
Ok(Self { pool })
}
async fn ensure_dict_tables(&self, dict_name: &str) -> Result<(), RepositoryError> {
// Create dict table if not exists
sqlx::query( sqlx::query(
r#" r#"
CREATE TABLE IF NOT EXISTS dicts ( CREATE TABLE IF NOT EXISTS dictionaries (
name TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP created_at DATETIME DEFAULT CURRENT_TIMESTAMP
) );
"#,
)
.execute(&self.pool)
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
// Insert dict if not exists CREATE TABLE IF NOT EXISTS entries (
sqlx::query("INSERT OR IGNORE INTO dicts (name) VALUES (?)")
.bind(dict_name)
.execute(&self.pool)
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
// Create entries table for this dict
let table_name = format!("dict_entries_{}", dict_name);
let create_table_sql = format!(
r#"
CREATE TABLE IF NOT EXISTS {} (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
text TEXT NOT NULL UNIQUE, dictionary_id INTEGER NOT NULL,
text TEXT NOT NULL,
metadata TEXT, metadata TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP FOREIGN KEY(dictionary_id) REFERENCES dictionaries(id) ON DELETE CASCADE,
) -- This constraint allows us to update existing words instead of duplicating them
UNIQUE(dictionary_id, text)
);
"#, "#,
table_name )
); .execute(&pool)
.await
sqlx::query(&create_table_sql) .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
.execute(&self.pool)
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
Ok(())
}
pub fn get_next_id(&self, dict_name: &str) -> Result<u32, RepositoryError> {
let pool = self.pool.clone();
let dict_name = dict_name.to_string();
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(async move {
let table_name = format!("dict_entries_{}", dict_name);
let result: Option<i64> =
sqlx::query_scalar(&format!("SELECT MAX(id) FROM {}", table_name))
.fetch_one(&pool)
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
Ok(result.map(|id| id as u32 + 1).unwrap_or(1)) Ok(Self { pool })
})
})
} }
fn find_id_by_text(&self, dict_name: &str, text: &str) -> Result<Option<u32>, RepositoryError> { // Helper: Resolve dictionary name to ID
let pool = self.pool.clone(); async fn get_dict_id(&self, name: &str) -> Result<i64, RepositoryError> {
let dict_name = dict_name.to_string(); let row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
let text = text.to_string(); .bind(name)
.fetch_optional(&self.pool)
tokio::task::block_in_place(|| { .await
tokio::runtime::Handle::current().block_on(async move { .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
let table_name = format!("dict_entries_{}", dict_name);
let result: Option<i64> =
sqlx::query_scalar(&format!("SELECT id FROM {} WHERE text = ?", table_name))
.bind(&text)
.fetch_one(&pool)
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
Ok(result.map(|id| id as u32)) match row {
}) Some(r) => Ok(r.get("id")),
}) None => Err(RepositoryError::NotFound(name.to_string())),
}
} }
} }
#[async_trait::async_trait]
impl DictRepository for SqliteDictRepository { impl DictRepository for SqliteDictRepository {
fn create(&self, name: &str) -> Result<(), RepositoryError> { async fn create(&self, name: &str) -> Result<(), RepositoryError> {
// This is a synchronous method, but we need to run async operations sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)")
// In a real application, you might want to make the trait async or use a blocking executor .bind(name)
let pool = self.pool.clone(); .execute(&self.pool)
let name = name.to_string(); .await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
// Use tokio's block_in_place to run async code in sync context Ok(())
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(async move {
let mut tx = pool.begin().await.map_err(|e| {
RepositoryError::Unexpected(format!("Failed to begin transaction: {}", e))
})?;
// Create dict table
sqlx::query(
r#"
CREATE TABLE IF NOT EXISTS dicts (
name TEXT PRIMARY KEY,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
)
"#,
)
.execute(&mut *tx)
.await
.map_err(|e| {
RepositoryError::Unexpected(format!("Failed to create dicts table: {}", e))
})?;
// Insert dict
sqlx::query("INSERT OR IGNORE INTO dicts (name) VALUES (?)")
.bind(&name)
.execute(&mut *tx)
.await
.map_err(|e| {
RepositoryError::Unexpected(format!("Failed to insert dict: {}", e))
})?;
// Create entries table for this dict
let table_name = format!("dict_entries_{}", name);
let create_table_sql = format!(
r#"
CREATE TABLE IF NOT EXISTS {} (
id INTEGER PRIMARY KEY,
text TEXT NOT NULL UNIQUE,
metadata TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
)
"#,
table_name
);
sqlx::query(&create_table_sql)
.execute(&mut *tx)
.await
.map_err(|e| {
RepositoryError::Unexpected(format!(
"Failed to create entries table: {}",
e
))
})?;
tx.commit().await.map_err(|e| {
RepositoryError::Unexpected(format!("Failed to commit transaction: {}", e))
})?;
Ok(())
})
})
} }
fn save_entries(&self, dict_name: &str, entries: &[DictEntry]) -> Result<(), RepositoryError> {
let pool = self.pool.clone();
let dict_name = dict_name.to_string();
let entries = entries.to_vec();
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(async move {
let table_name = format!("dict_entries_{}", dict_name);
for entry in entries {
let metadata_json = serde_json::to_string(&entry.metadata)
.map_err(|e| RepositoryError::InvalidData(e.to_string()))?;
// Check if entry with this text already exists
let existing_id: Option<i64> = sqlx::query_scalar(&format!(
"SELECT id FROM {} WHERE text = ?",
table_name
))
.bind(&entry.text)
.fetch_optional(&pool)
.await
.map_err(|e| RepositoryError::Unexpected(format!("Failed to check existing entry: {}", e)))?;
if let Some(id) = existing_id { async fn save_entries(
// Update existing entry &self,
sqlx::query(&format!( dict_name: &str,
"UPDATE {} SET metadata = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?", entries: &[DictEntry],
table_name ) -> Result<(), RepositoryError> {
)) let mut tx = self
.bind(metadata_json) .pool
.bind(id) .begin()
.execute(&pool) .await
.await .map_err(|_| RepositoryError::ConnectionFailed)?;
.map_err(|e| RepositoryError::Unexpected(format!("Failed to update entry: {}", e)))?;
} else {
// Insert new entry
sqlx::query(&format!(
"INSERT INTO {} (id, text, metadata) VALUES (?, ?, ?)",
table_name
))
.bind(entry.id as i64)
.bind(&entry.text)
.bind(metadata_json)
.execute(&pool)
.await
.map_err(|e| RepositoryError::Unexpected(format!("Failed to insert entry: {}", e)))?;
}
}
Ok(()) // 1. Get Dict ID
}) let dict_id_row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
}) .bind(dict_name)
} .fetch_optional(&mut *tx)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
let dict_id: i64 = match dict_id_row {
Some(row) => row.get("id"),
None => return Err(RepositoryError::NotFound(dict_name.to_string())),
};
// 2. Batch Upsert
for entry in entries {
// We must wrap the HashMap in sqlx::types::Json so SQLx knows how to serialize it
let meta_json = sqlx::types::Json(&entry.metadata);
sqlx::query(
r#"
INSERT INTO entries (dictionary_id, text, metadata)
VALUES (?, ?, ?)
ON CONFLICT(dictionary_id, text) DO UPDATE SET
metadata = excluded.metadata,
updated_at = CURRENT_TIMESTAMP
"#,
)
.bind(dict_id)
.bind(&entry.text)
.bind(meta_json)
.execute(&mut *tx)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
}
fn get_next_id(&self, dict_name: &str) -> Result<u32, RepositoryError> { tx.commit()
self.get_next_id(dict_name) .await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(())
} }
fn fetch_many( async fn fetch_many(
&self, &self,
name: &str, name: &str,
limit: Option<u32>, limit: Option<u32>,
offset: Option<u32>, offset: Option<u32>,
) -> Result<Dict, RepositoryError> { ) -> Result<Dict, RepositoryError> {
let pool = self.pool.clone(); // 1. Get Dict ID
let name = name.to_string(); let dict_id = self.get_dict_id(name).await?;
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(async move {
let table_name = format!("dict_entries_{}", name);
// Check if dict exists
let dict_exists: bool =
sqlx::query_scalar("SELECT EXISTS(SELECT 1 FROM dicts WHERE name = ?)")
.bind(&name)
.fetch_one(&pool)
.await
.map_err(|e| {
RepositoryError::Unexpected(format!(
"Failed to check dict exists: {}",
e
))
})?;
if !dict_exists {
return Err(RepositoryError::NotFound);
}
let mut query = format!("SELECT id, text, metadata FROM {}", table_name);
if let Some(offset_val) = offset { // 2. Prepare Limits
query.push_str(&format!(" LIMIT {}", limit.unwrap_or(1000))); let limit_val = limit.unwrap_or(1000);
query.push_str(&format!(" OFFSET {}", offset_val)); let offset_val = offset.unwrap_or(0);
} else if let Some(limit_val) = limit {
query.push_str(&format!(" LIMIT {}", limit_val));
}
let rows = sqlx::query(&query).fetch_all(&pool).await.map_err(|e| { // 3. Query (Reading into the DTO)
RepositoryError::Unexpected(format!("Failed to fetch entries: {}", e)) let dtos = sqlx::query_as::<_, SqliteEntryDto>(
})?; r#"
SELECT id, text, metadata
let mut entries = HashMap::new(); FROM entries
for row in rows { WHERE dictionary_id = ?
let id: i64 = row.get("id"); LIMIT ? OFFSET ?
let text: String = row.get("text"); "#,
let metadata_json: Option<String> = row.get("metadata"); )
.bind(dict_id)
let metadata = if let Some(json) = metadata_json { .bind(limit_val)
serde_json::from_str(&json) .bind(offset_val)
.map_err(|e| RepositoryError::InvalidData(e.to_string()))? .fetch_all(&self.pool)
} else { .await
HashMap::new() .map_err(|e| RepositoryError::StorageError(e.to_string()))?;
};
// 4. Convert DTOs to Domain Dict
let entry = DictEntry { let mut entries_map = HashMap::new();
id: id as DictEntryId, for dto in dtos {
text, let entry: DictEntry = dto.into(); // Converts DTO -> Entity
metadata,
}; // We safely unwrap because the DB guarantees an ID exists
if let Some(id) = entry.id {
entries.insert(entry.id, entry); entries_map.insert(id, entry);
} }
}
Ok(Dict { name, entries })
}) Ok(Dict {
name: name.to_string(),
entries: entries_map,
}) })
} }
} }

2
lib/src/presentation/cli/commands/import_dict.rs

@ -21,7 +21,7 @@ pub async fn run<R: DictRepository>(
let importer = DictImporter::new(&repository); let importer = DictImporter::new(&repository);
// Perform the import (this will call create() first) // Perform the import (this will call create() first)
match importer.import(&config.name, source) { match importer.import(&config.name, source).await {
Ok(()) => { Ok(()) => {
info!("Successfully imported dictionary '{}'", config.name); info!("Successfully imported dictionary '{}'", config.name);
Ok(()) Ok(())

Loading…
Cancel
Save