bee-san
diff --git a/‎Cargo.lock‎
Lines changed: 105 additions & 106 deletions b/‎Cargo.lock‎
Lines changed: 105 additions & 106 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 2 additions & 2 deletions b/‎Cargo.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/changes/2024-07-10-astar-refactor.md‎
Lines changed: 28 additions & 0 deletions b/‎docs/changes/2024-07-10-astar-refactor.md‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎docs/changes/2024-07-10-improve-string-pruning.md‎
Lines changed: 30 additions & 0 deletions b/‎docs/changes/2024-07-10-improve-string-pruning.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎docs/changes/2024-07-10-remove-cipher-mapping.md‎
Lines changed: 24 additions & 0 deletions b/‎docs/changes/2024-07-10-remove-cipher-mapping.md‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎docs/changes/2024-07-10-remove-decoder-popularity.md‎
Lines changed: 25 additions & 0 deletions b/‎docs/changes/2024-07-10-remove-decoder-popularity.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎docs/rules/documentation.mdc‎
Lines changed: 147 additions & 1 deletion b/‎docs/rules/documentation.mdc‎
Lines changed: 147 additions & 1 deletion
diff --git a/‎src/checkers/athena.rs‎
Lines changed: 2 additions & 0 deletions b/‎src/checkers/athena.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/checkers/english.rs‎
Lines changed: 0 additions & 10 deletions b/‎src/checkers/english.rs‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎src/checkers/lemmeknow_checker.rs‎
Lines changed: 53 additions & 0 deletions b/‎src/checkers/lemmeknow_checker.rs‎
Lines changed: 53 additions & 0 deletions
@@ -1,7 +1,7 @@
 [package]
 name = "project_ares"
 repository = "https://github.com/bee-san/Ares"
-version = "0.10.0"
+version = "0.11.0"
 edition = "2021"
 description = "Automated decoding tool, Ciphey but in Rust"
 license = "MIT"
@@ -48,7 +48,7 @@ bs58 = "0.5.0"
 data-encoding = "2.4.0"
 urlencoding = "2.1.3"
 z85 = "3.0.5"
-gibberish-or-not = "4.0.3"
+gibberish-or-not = "4.1.0"
 cipher_identifier = "0.2.0"
 rand = "0.9.0"  # For generating random values
 colored = "3.0.0"
 
@@ -0,0 +1,28 @@
+# Change: AStar Refactoring and String Quality Enhancement
+
+## Purpose
+Refactor the AStar search implementation to improve code organization and enhance string quality assessment by filtering out strings with high percentages of invisible characters.
+
+## Trade-offs
+### Advantages
+- Improved code organization with helper functions in a separate module
+- Better memory efficiency by quickly rejecting strings with >50% invisible characters
+- Enhanced maintainability through clearer separation of concerns
+- Easier testing of individual helper functions
+
+### Disadvantages
+- Slight increase in module complexity with an additional file
+- Potential for minor performance overhead from cross-module function calls
+
+## Technical Implementation
+- Split AStar implementation into two files:
+  - `astar.rs`: Core A* search algorithm implementation
+  - `helper_functions.rs`: Supporting functions for heuristics, quality assessment, and statistics
+- Enhanced `calculate_string_quality` function to immediately reject strings with >50% invisible characters
+- Added a new test case to verify the invisible character filtering functionality
+- Updated module imports and exports in `mod.rs`
+
+## Future Improvements
+- Persist decoder success statistics to disk for learning across sessions
+- Further optimize string quality assessment with more sophisticated language detection
+- Consider moving more common utility functions to the helper module for reuse by other search algorithms 
@@ -0,0 +1,30 @@
+# Change: Improve String Pruning for Low-Quality Inputs
+
+## Purpose
+Enhance the pruning mechanism to skip decoding of low-quality strings, which improves efficiency by avoiding wasted computation on strings that are unlikely to produce meaningful results.
+
+## Trade-offs
+### Advantages
+- Reduces computational resources spent on strings unlikely to yield useful results
+- Speeds up the overall decoding process by focusing on higher-quality candidates
+- Prevents the search algorithm from exploring unproductive paths
+- Improves memory usage by pruning low-quality strings early
+
+### Disadvantages
+- May occasionally reject valid encodings that have unusual characteristics
+- Requires careful tuning of thresholds to balance efficiency and thoroughness
+- Adds additional computation for quality checks (though this is minimal compared to the savings)
+
+## Technical Implementation
+- Enhanced the `check_if_string_cant_be_decoded` function to consider multiple quality factors:
+  - String length (rejects strings with 2 or fewer characters)
+  - Non-printable character ratio (rejects strings with >30% non-printable characters)
+  - Overall string quality (rejects strings with quality score <0.2)
+- Added comprehensive tests to verify the pruning behavior
+- Updated documentation to explain the rationale behind each pruning criterion
+
+## Future Improvements
+- Fine-tune the thresholds based on real-world usage data
+- Consider adding more sophisticated quality metrics (e.g., entropy, character distribution)
+- Implement adaptive thresholds that adjust based on the search context
+- Add logging to track how many strings are being pruned and why 
@@ -0,0 +1,24 @@
+# Change: Remove CIPHER_MAPPING from helper_functions
+
+## Purpose
+Remove the incorrect mapping between Cipher Identifier's cipher names and Ares decoder names. The mapping was inaccurate, particularly with "fractionatedMorse" being incorrectly mapped to "morseCode" when they are different encoding schemes.
+
+## Trade-offs
+### Advantages
+- Removes incorrect mappings that could lead to misidentification of ciphers
+- Simplifies the code by directly using the first result from Cipher Identifier
+- Eliminates potential confusion between different cipher types
+
+### Disadvantages
+- No longer filters cipher types based on available decoders
+- May return cipher types that don't have corresponding decoders in Ares
+
+## Technical Implementation
+- Removed the `CIPHER_MAPPING` static variable and its documentation
+- Modified the `get_cipher_identifier_score` function to return the first result from Cipher Identifier instead of checking against the mapping
+- Verified that all tests still pass after the changes
+
+## Future Improvements
+- Consider implementing a more accurate mapping if needed in the future
+- Potentially add a check to verify if Ares has a decoder for the identified cipher type
+- Could add a more sophisticated scoring mechanism for cipher identification 
@@ -0,0 +1,25 @@
+# Change: Remove get_decoder_popularity Function
+
+## Purpose
+Remove the redundant `get_decoder_popularity` function from `helper_functions.rs` since decoders already have a `popularity` attribute in their implementation. This eliminates duplication and ensures that popularity values are maintained in a single location.
+
+## Trade-offs
+### Advantages
+- Eliminates redundant code that duplicated popularity values
+- Simplifies maintenance by having popularity values defined only in the decoder implementations
+- Reduces the risk of inconsistencies between the function and the actual decoder attributes
+
+### Disadvantages
+- The `generate_heuristic` function no longer has direct access to the popularity values
+- Using success rate as a proxy for popularity may not perfectly match the original behavior
+
+## Technical Implementation
+- Removed the `get_decoder_popularity` function from `helper_functions.rs`
+- Modified the `generate_heuristic` function to use the decoder's success rate as a proxy for popularity
+- Updated tests to verify that success rate affects the heuristic calculation
+- Removed the now-obsolete `test_popularity_affects_heuristic` test
+
+## Future Improvements
+- Consider modifying the `CrackResult` struct to include the decoder's popularity attribute
+- Explore ways to directly access the decoder's popularity attribute in the `generate_heuristic` function
+- Evaluate whether success rate is an appropriate proxy for popularity or if another approach would be better 
@@ -1 +1,147 @@
- 
+# Documentation Standards
+
+Rule to ensure consistent documentation across the codebase.
+
+<rule>
+name: documentation_standards
+description: Standards for code documentation and comments
+filters:
+  # Match any Rust files
+  - type: file_extension
+    pattern: "\\.rs$"
+  # Match documentation comments
+  - type: content
+    pattern: "///|//!|#\\[doc"
+
+actions:
+  - type: suggest
+    message: |
+      When writing documentation:
+
+      1. Module-Level Documentation:
+         ```rust
+         //! Module description that explains its purpose
+         //! 
+         //! Detailed explanation of what the module handles, its key
+         //! components, and any important concepts.
+         ```
+
+      2. Struct Documentation:
+         ```rust
+         /// Represents a complex type with a clear purpose.
+         /// Each field is documented to explain its role and format.
+         #[derive(Debug)]  // Add debug when struct should be printable
+         pub struct MyStruct {
+             /// Field description explaining its purpose
+             /// Format specification if applicable (e.g. "r,g,b" format)
+             pub field: Type,
+         }
+         ```
+
+      3. Function Documentation:
+         ```rust
+         /// Clear description of what the function does.
+         /// 
+         /// # Arguments
+         /// * `arg_name` - Detailed description of the argument
+         /// 
+         /// # Returns
+         /// * `ReturnType` - Description of the return value
+         /// 
+         /// # Panics
+         /// Document any conditions that cause panics
+         pub fn my_function(arg: Type) -> ReturnType {
+         ```
+
+      4. Enum Documentation:
+         ```rust
+         /// Description of what the enum represents
+         /// Include any important usage information
+         #[derive(Debug)]
+         pub enum MyEnum {
+             /// Description of this variant
+             /// Include any specific behavior
+             Variant1,
+         }
+         ```
+
+      5. Error Handling Documentation:
+         ```rust
+         /// Function that can fail
+         /// 
+         /// # Errors
+         /// * Describes conditions that cause errors
+         /// * Lists the types of errors that can occur
+         pub fn fallible_function() -> Result<T, Error> {
+         ```
+
+      6. Constants and Configuration:
+         ```rust
+         /// Description of what the constant represents
+         /// Include units or format if applicable
+         pub const MY_CONSTANT: f64 = 0.3;
+         ```
+
+      7. Public API Documentation:
+         ```rust
+         /// Public function with complete documentation
+         /// 
+         /// Detailed description of the function's purpose,
+         /// behavior, and any important notes about usage.
+         /// 
+         /// # Examples
+         /// ```rust
+         /// let result = my_function("input");
+         /// assert_eq!(result, "expected");
+         /// ```
+         pub fn my_function() {
+         ```
+
+      8. Internal Function Documentation:
+         ```rust
+         /// Brief description of internal function
+         /// Focus on technical details relevant to maintainers
+         fn internal_function() {
+         ```
+
+examples:
+  - input: |
+      //! Color handling module for CLI output
+      //! 
+      //! This module manages color schemes and formatting for
+      //! CLI output, ensuring consistent appearance across the app.
+      
+      /// Represents a color scheme with RGB values for different roles.
+      /// Each color is stored as a comma-separated RGB string.
+      #[derive(Debug)]
+      pub struct ColorScheme {
+          /// RGB color for informational messages (format: "r,g,b")
+          pub informational: String,
+      }
+      
+      /// Formats text with specified color scheme
+      /// 
+      /// # Arguments
+      /// * `text` - The text to color
+      /// * `role` - The role determining color choice
+      /// 
+      /// # Returns
+      /// * `String` - Colored text string
+      pub fn format_text(text: &str, role: &str) -> String {
+    output: "Valid documentation format"
+
+  - input: |
+      // Bad documentation
+      struct Colors {
+          // RGB color
+          info: String,
+      }
+      
+      // Colors the text
+      fn color_text(t: &str) -> String {
+    output: "Invalid documentation format"
+
+metadata:
+  priority: high
+  version: 1.0
+</rule>
@@ -55,7 +55,9 @@ impl Check for Checker<Athena> {
             // TODO: wrap all checkers in oncecell so we only create them once!
             let lemmeknow = Checker::<LemmeKnow>::new().with_sensitivity(self.sensitivity);
             let lemmeknow_result = lemmeknow.check(text);
+            //println!("Text is {}", text);
             if lemmeknow_result.is_identified {
+                println!("lemmeknow_result: {:?}", lemmeknow_result.is_identified);
                 let mut check_res = CheckResult::new(&lemmeknow);
                 let human_result = human_checker::human_checker(&lemmeknow_result);
                 check_res.is_identified = human_result;
 
@@ -139,16 +139,6 @@ mod tests {
         assert!(checker.check("Prei?nterview He!llo Dog?").is_identified);
     }
 
-    #[test]
-    fn test_checker_fails_doesnt_hit_40_percent() {
-        let checker = Checker::<EnglishChecker>::new();
-        assert!(
-            checker
-                .check("Hello Dog nnnnnnnnnnn llllllll ppppppppp gggggggg")
-                .is_identified
-        );
-    }
-
     #[test]
     fn test_check_fail_single_puncuation_char() {
         let checker = Checker::<EnglishChecker>::new();
 
@@ -58,3 +58,56 @@ impl Check for Checker<LemmeKnow> {
 fn format_data_result(input: &Data) -> String {
     input.name.to_string()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::checkers::checker_type::{Check, Checker};
+    use gibberish_or_not::Sensitivity;
+
+    #[test]
+    fn test_url_exact_match() {
+        let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
+        assert!(checker.check("https://google.com").is_identified);
+    }
+
+    #[test]
+    fn test_url_with_extra_text_fails() {
+        let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
+        assert!(
+            !checker
+                .check("https://google.com and some text")
+                .is_identified
+        );
+    }
+
+    #[test]
+    fn test_ip_exact_match() {
+        let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
+        assert!(checker.check("192.168.1.1").is_identified);
+    }
+
+    #[test]
+    fn test_ip_with_extra_text_fails() {
+        let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
+        assert!(!checker.check("IP is 192.168.1.1").is_identified);
+    }
+
+    #[test]
+    fn test_s3_path() {
+        let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
+        assert!(checker.check("s3://bucket/path/key").is_identified);
+    }
+
+    // Lemmeknow can only match if its an EXACT match
+    // So this should fail
+    #[test]
+    fn test_bitcoin_with_extra_text_fails() {
+        let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
+        assert!(
+            !checker
+                .check("BTC address: 1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2")
+                .is_identified
+        );
+    }
+}