From dd1b3ddc998cd722d8e15c5475f106de54784db3 Mon Sep 17 00:00:00 2001
From: chodak166 <chodak166@op.pl>
Date: Fri, 26 Dec 2025 18:40:30 +0100
Subject: [PATCH] WIP: encoder

---
 lib/src/core/entities.rs          |  15 +++++
 lib/src/core/sys_major/encoder.rs | 105 +++++++++++++++++++++++++++++-
 lib/src/core/sys_major/lvmap.rs   |  16 ++++-
 3 files changed, 134 insertions(+), 2 deletions(-)
diff --git a/lib/src/core/entities.rs b/lib/src/core/entities.rs
index 141284b..68e38bd 100644
--- a/lib/src/core/entities.rs
+++ b/lib/src/core/entities.rs
@@ -1,11 +1,26 @@
 use super::errors::CodecError;
 use std::num::ParseIntError;
+use std::ops::Deref;
 use std::{collections::HashMap, u64};
 
 /// A number encoded as a sequence of words
 #[derive(Debug, Clone)]
 pub struct EncodedValue(Vec<String>);
 
+impl EncodedValue {
+    pub fn new(data: Vec<String>) -> Self {
+        EncodedValue(data)
+    }
+}
+
+impl Deref for EncodedValue {
+    type Target = Vec<String>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
 /// The number value can be encoded as many word sets,
 /// but decoded as one number. For partial values
 /// and dictionary words (reasonable length), we can use
diff --git a/lib/src/core/sys_major/encoder.rs b/lib/src/core/sys_major/encoder.rs
index fa1c0ef..fbe18f5 100644
--- a/lib/src/core/sys_major/encoder.rs
+++ b/lib/src/core/sys_major/encoder.rs
@@ -16,6 +16,109 @@ impl SystemEncoder for Encoder {
         Ok(())
     }
     fn encode(&self, word: &str) -> Result<EncodedValue, CodecError> {
-        todo!()
+        let size = word.chars().count();
+        let max_mask: usize = (1 << (size - 1)) - 1;
+
+        let indices: Vec<usize> = word.char_indices().map(|(i, _)| i).collect();
+        let mut results = Vec::with_capacity(max_mask);
+
+        for mask in 0..=max_mask {
+            let mut parts = Vec::new();
+            let mut last_split = 0;
+
+            // Iterate through the mask bits to find where to split
+            for i in 0..size - 1 {
+                // Check if the i-th bit is set
+                if (mask >> i) & 1 == 1 {
+                    // The split corresponds to the byte index of the (i+1)-th character
+                    let split_idx = indices[i + 1];
+                    parts.push(&word[last_split..split_idx]);
+                    last_split = split_idx;
+                }
+            }
+            // Push the remaining part of the string
+            parts.push(&word[last_split..]);
+
+            // Calculate metrics for sorting
+            let num_parts = parts.len();
+
+            // To find the "most equal" size, we minimize the sum of squared lengths.
+            // (This mathematically minimizes variance without needing floating point math)
+            let sum_sq_len: usize = parts.iter().map(|p| p.chars().count().pow(2)).sum();
+
+            // Construct the final string representation (e.g., "abc|de|fg")
+            let result_string = parts.join("|");
+
+            results.push(Partition {
+                word: result_string,
+                num_parts,
+                sum_sq_len,
+            });
+        }
+
+        // Ok(EncodedValue::new(words))
+        // Sort by:
+        // 1. Fewer parts first (1 part, then 2 parts...)
+        // 2. Most equal lengths (lower sum of squared lengths is more balanced)
+        // 3. Lexicographically (for deterministic stability)
+        results.sort_by(|a, b| {
+            a.num_parts
+                .cmp(&b.num_parts)
+                .then(a.sum_sq_len.cmp(&b.sum_sq_len))
+                .then(a.word.cmp(&b.word))
+        });
+
+        // Extract just the strings
+        let words = results.into_iter().map(|p| p.word).collect();
+        Ok(EncodedValue::new(words))
+    }
+}
+
+// A helper struct to keep the string and its sort metrics together
+struct Partition {
+    word: String,
+    num_parts: usize,
+    sum_sq_len: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_encode_as_single_length_result() {
+        let mut lvmap = LenValueMap::new();
+        lvmap.push(3, 123, "test_123");
+        lvmap.push(3, 345, "test_345_1");
+        lvmap.push(3, 345, "test_345_2");
+        lvmap.push(3, 678, "test_678");
+        let encoder = Encoder::new(lvmap);
+        let result = encoder.encode("345").unwrap();
+
+        assert_eq!(result.len(), 2);
+        assert!(result.contains(&"test_345_1".into()));
+        assert!(result.contains(&"test_345_2".into()));
+    }
+
+    #[test]
+    fn test_encode_as_all_lengths() {
+        let mut lvmap = LenValueMap::new();
+        lvmap.push(1, 0, "test_0");
+        lvmap.push(1, 9, "test_9");
+        lvmap.push(1, 8, "test_8");
+        lvmap.push(2, 98, "test_98");
+        lvmap.push(2, 87, "test_87");
+        lvmap.push(3, 987, "test_987");
+        lvmap.push(3, 876, "test_876");
+
+        let encoder = Encoder::new(lvmap);
+        let result = encoder.encode("987").unwrap();
+
+        assert_eq!(result.len(), 5);
+        assert!(result.contains(&"test_987".into()));
+        assert!(result.contains(&"test_98".into()));
+        assert!(result.contains(&"test_87".into()));
+        assert!(result.contains(&"test_9".into()));
+        assert!(result.contains(&"test_8".into()));
     }
 }
diff --git a/lib/src/core/sys_major/lvmap.rs b/lib/src/core/sys_major/lvmap.rs
index 2ec83fc..e9ed4c1 100644
--- a/lib/src/core/sys_major/lvmap.rs
+++ b/lib/src/core/sys_major/lvmap.rs
@@ -1,5 +1,5 @@
 use crate::core::{DictRepository, SystemDecoder, entities::DecodedLength, errors::CodecError};
-use std::{collections::HashMap, num::ParseIntError};
+use std::{collections::HashMap, hash::Hash, num::ParseIntError};
 use thiserror::Error;
 
 // We store words by encoded number length, then encoded value
@@ -51,6 +51,16 @@ impl LenValueMap {
         self.data
     }
 
+    pub fn push(&mut self, len: u8, num: DecodedNumber, word: impl Into<String>) -> &mut Self {
+        self.data
+            .entry(DecodedLength::from(len))
+            .or_insert_with(HashMap::new)
+            .entry(num)
+            .or_insert_with(Vec::new)
+            .push(word.into());
+        self
+    }
+
     pub fn insert_words<I>(
         &mut self,
         words: I,
@@ -75,6 +85,10 @@ impl LenValueMap {
         Ok(())
     }
 
+    pub fn from_data(data: LenValueData) -> Self {
+        Self { data: data }
+    }
+
     pub async fn from_dict(
         decoder: &impl SystemDecoder,
         repo: &impl DictRepository,