You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

231 lines
7.6 KiB

use crate::common::entities::{Dict, DictEntry};
use crate::common::errors::RepositoryError;
use crate::common::traits::DictRepository;
use futures::TryStreamExt;
use futures::stream::BoxStream;
use sqlx::{Row, SqlitePool, sqlite::SqliteConnectOptions};
use std::collections::HashMap;
use std::str::FromStr;
#[derive(sqlx::FromRow)]
struct SqliteEntryDto {
id: i64,
text: String,
// sqlx reads the DB column into this specific wrapper
metadata: sqlx::types::Json<HashMap<String, String>>,
}
// Mapper: DTO -> Domain Entity
impl From<SqliteEntryDto> for DictEntry {
fn from(dto: SqliteEntryDto) -> Self {
Self {
id: Some(dto.id as u64),
text: dto.text,
// Unwrap the sqlx wrapper to get the inner HashMap
metadata: dto.metadata.0,
}
}
}
// --- REPOSITORY IMPLEMENTATION ---
#[derive(Clone)]
pub struct SqliteDictRepository {
pool: SqlitePool,
dict_name: String,
}
impl SqliteDictRepository {
pub async fn new(database_url: &str) -> Result<Self, RepositoryError> {
let options = SqliteConnectOptions::from_str(database_url)
.map_err(|_| RepositoryError::ConnectionFailed)?
.create_if_missing(true);
let pool = SqlitePool::connect_with(options)
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
// Ensure tables exist with proper Normalization and Constraints
sqlx::query(
r#"
CREATE TABLE IF NOT EXISTS dictionaries (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS entries (
id INTEGER PRIMARY KEY,
dictionary_id INTEGER NOT NULL,
text TEXT NOT NULL,
metadata TEXT,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY(dictionary_id) REFERENCES dictionaries(id) ON DELETE CASCADE,
-- This constraint allows us to update existing words instead of duplicating them
UNIQUE(dictionary_id, text)
);
"#,
)
.execute(&pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(Self {
pool: pool,
dict_name: "default_dict".into(),
})
}
// Helper: Resolve dictionary name to ID
async fn get_dict_id(&self) -> Result<i64, RepositoryError> {
let row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
.bind(&self.dict_name)
.fetch_optional(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
match row {
Some(r) => Ok(r.get("id")),
None => Err(RepositoryError::NotFound(self.dict_name.clone())),
}
}
}
#[async_trait::async_trait]
impl DictRepository for SqliteDictRepository {
async fn create_dict(&self) -> Result<(), RepositoryError> {
sqlx::query("INSERT OR IGNORE INTO dictionaries (name) VALUES (?)")
.bind(&self.dict_name)
.execute(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(())
}
fn use_dict(&mut self, name: &str) {
self.dict_name = name.to_string();
}
async fn save_entries(&self, entries: &[DictEntry]) -> Result<(), RepositoryError> {
let mut tx = self
.pool
.begin()
.await
.map_err(|_| RepositoryError::ConnectionFailed)?;
// 1. Get Dict ID
let dict_id_row = sqlx::query("SELECT id FROM dictionaries WHERE name = ?")
.bind(&self.dict_name)
.fetch_optional(&mut *tx)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
let dict_id: i64 = match dict_id_row {
Some(row) => row.get("id"),
None => return Err(RepositoryError::NotFound(self.dict_name.clone())),
};
// 2. Batch Upsert
for entry in entries {
// We must wrap the HashMap in sqlx::types::Json so SQLx knows how to serialize it
let meta_json = sqlx::types::Json(&entry.metadata);
sqlx::query(
r#"
INSERT INTO entries (dictionary_id, text, metadata)
VALUES (?, ?, ?)
ON CONFLICT(dictionary_id, text) DO UPDATE SET
metadata = excluded.metadata,
updated_at = CURRENT_TIMESTAMP
"#,
)
.bind(dict_id)
.bind(&entry.text)
.bind(meta_json)
.execute(&mut *tx)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
}
tx.commit()
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(())
}
async fn fetch_many(&self, limit: usize, offset: usize) -> Result<Dict, RepositoryError> {
// Get Dict ID
let dict_id = self.get_dict_id().await?;
// Query (Reading into the DTO)
let dtos = sqlx::query_as::<_, SqliteEntryDto>(
r#"
SELECT id, text, metadata
FROM entries
WHERE dictionary_id = ?
LIMIT ? OFFSET ?
"#,
)
.bind(dict_id)
.bind(limit as u32)
.bind(offset as u32)
.fetch_all(&self.pool)
.await
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
// 4. Convert DTOs to Domain Dict
let mut entries_map = HashMap::new();
for dto in dtos {
let entry: DictEntry = dto.into(); // Converts DTO -> Entity
// We safely unwrap because the DB guarantees an ID exists
if let Some(id) = entry.id {
entries_map.insert(id, entry);
}
}
Ok(Dict {
name: self.dict_name.clone(),
entries: entries_map,
})
}
async fn stream_batches(
&self,
batch_size: usize,
) -> Result<BoxStream<'_, Result<Vec<String>, RepositoryError>>, RepositoryError> {
// 1. Resolve ID first
let dict_id = self.get_dict_id().await?;
// 2. Create the base query stream.
// We do NOT use limit/offset. We let the DB stream rows via a cursor.
let query_stream = sqlx::query("SELECT text FROM entries WHERE dictionary_id = ?")
.bind(dict_id)
.fetch(&self.pool);
// 3. Transform the stream using Functional combinators
let stream = query_stream
// Map SQLx errors to Domain errors
.map_err(|e| RepositoryError::StorageError(e.to_string()))
// Extract the String from the Row
.and_then(|row| async move {
// 'text' is the column name
let text: String = row
.try_get("text")
.map_err(|e| RepositoryError::StorageError(e.to_string()))?;
Ok(text)
})
// Group items into vectors of size `batch_size`
.try_chunks(batch_size)
// try_chunks returns a specific error type on failure, map it back
.map_err(|e| {
// logic to handle leftover elements if error occurs,
// but for simplicity, we treat stream errors as fatal here
RepositoryError::StorageError(e.to_string())
});
// 4. Box the stream to erase the complex iterator type (Type Erasure)
Ok(Box::pin(stream))
}
}