dataforgoodfr
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 36 additions & 0 deletions b/‎Makefile‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 10 additions & 0 deletions b/‎README.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎…olicies_extraction/README_MULTI_AGENT.md‎ ‎…cs/getting-started/README_MULTI_AGENT.md‎rag_system/pipeline_scripts/agentic_data_policies_extraction/README_MULTI_AGENT.md renamed to docs/getting-started/README_MULTI_AGENT.md
Lines changed: 8 additions & 7 deletions b/‎…olicies_extraction/README_MULTI_AGENT.md‎ ‎…cs/getting-started/README_MULTI_AGENT.md‎rag_system/pipeline_scripts/agentic_data_policies_extraction/README_MULTI_AGENT.md renamed to docs/getting-started/README_MULTI_AGENT.md
Lines changed: 8 additions & 7 deletions
diff --git a/‎rag_system/README.md‎
Lines changed: 10 additions & 0 deletions b/‎rag_system/README.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎rag_system/pipeline_scripts/agentic_data_policies_extraction/__init__.py‎ b/‎rag_system/pipeline_scripts/agentic_data_policies_extraction/__init__.py‎
diff --git a/‎rag_system/pipeline_scripts/agentic_data_policies_extraction/clients/__init__.py‎ b/‎rag_system/pipeline_scripts/agentic_data_policies_extraction/clients/__init__.py‎
diff --git a/‎rag_system/pipeline_scripts/agentic_data_policies_extraction/clients/database_client.py‎
Lines changed: 67 additions & 54 deletions b/‎rag_system/pipeline_scripts/agentic_data_policies_extraction/clients/database_client.py‎
Lines changed: 67 additions & 54 deletions
@@ -172,3 +172,4 @@ kotaemon-custom/kotaemon/ktem_app_data
 
 # secret files
 secret*
+outputs/
@@ -0,0 +1,36 @@
+.PHONY: help install dev test lint format clean run deploy
+
+help:
+	@echo "Available commands:"
+	@echo "  make install    - Install dependencies"
+	@echo "  make dev        - Install dev dependencies"
+	@echo "  make test       - Run tests"
+	@echo "  make lint       - Run linters"
+	@echo "  make format     - Format code"
+	@echo "  make clean      - Clean cache files"
+
+install:
+	uv sync
+
+dev:
+	uv pip install -e ".[dev]"
+
+test:
+	uv run pytest tests/ -v
+
+test-cov:
+	pytest --cov=src/bia_backend --cov-report=html
+
+lint:
+	uv tool run ruff check src/ tests/
+	uv tool run mypy src/
+
+format:
+	uv tool run ruff format src/ tests/ rag_system/pipeline_scripts
+	uv tool run isort src/ tests/ rag_system/pipeline_scripts
+
+
+clean:
+	find . -type d -name __pycache__ -exec rm -rf {} +
+	find . -type f -name "*.pyc" -delete
+	rm -rf .pytest_cache .coverage htmlcov .mypy_cache .ruff_cache
@@ -70,5 +70,15 @@ Plus d'informations : [documentation officielle de uv](https://astral.sh/uv)
     tox -vv
 
 
+## Roadmap
+
+- [ ] Réduire les requirements dans `rag_system`
+- [ ] Fusionner `rag_system` et `src` dans un seul dossier
+- [ ] Ajouter des tests unitaires
+- [ ] Ajouter des tests d'intégration
+- [ ] Améliorer la documentation
+- [ ] Améliorer l'extraction de politiques de sobriété
+
+
 > [!IMPORTANT]
 > Projet en développement actif, pas de garantie de fonctionnement, notamment pour les tests.
@@ -33,7 +33,7 @@ The system consists of 8 specialized agents:
 ### Basic Usage
 
 ```python
-from AI.handlers.enhanced_main_handler import get_enhanced_handler
+from .handlers.enhanced_main_handler import get_enhanced_handler
 
 # Initialize with multi-agent support
 handler = get_enhanced_handler(use_agents=True)
@@ -54,7 +54,7 @@ print(comparison)
 ### Step-by-Step Process
 
 ```python
-from AI.handlers.agent_orchestrator import get_agent_orchestrator
+from .handlers.agent_orchestrator import get_agent_orchestrator
 
 orchestrator = get_agent_orchestrator()
 
@@ -119,13 +119,14 @@ The system maintains the same JSON output format as the original prompt:
 ### Test with Sample Data
 
 ```bash
-python example_usage.py
+cd rag_system/pipeline_scripts
+uv run python -m  agentic_data_policies_extraction.example_usage
 ```
 
 ### Test with PDF Files
 
 ```bash
-python main.py
+uv run python -m  agentic_data_policies_extraction.main
 ```
 
 ### Production Mode
@@ -200,15 +201,15 @@ The system is designed to be a drop-in replacement for the original single promp
 
 ```python
 # Old way
-from AI.prompts.text_analyzer import get_prompt_extraction
-from AI.handlers.main_handler import get_client, get_response
+from .prompts.text_analyzer import get_prompt_extraction
+from .handlers.main_handler import get_client, get_response
 
 client = get_client()
 prompt = get_prompt_extraction(conclusion_text)
 response = get_response(client, prompt)
 
 # New way
-from AI.handlers.enhanced_main_handler import get_enhanced_handler
+from .handlers.enhanced_main_handler import get_enhanced_handler
 
 handler = get_enhanced_handler(use_agents=True)
 result = handler.extract_data(conclusion_text, method="agents")
 
@@ -33,6 +33,16 @@ uv sync
 You can find a detailed guide here: [📄](../rag_system/pipeline_scripts/agentic_data_policies_extraction/policies_transformation_to_matrices/README.md)
 
 
+## Running the RAG System
+
+We recommend running as a Python module, or using the Docker Compose file:
+
+```bash
+cd rag_system/pipeline_scripts
+uv run python -m  agentic_data_policies_extraction.main
+```
+
+
 ## Kotaemon Subtree Setup
 
 The Kotaemon folder is a shared Data4Good subtree, synchronized with the common project:
 
@@ -1,49 +1,53 @@
+import logging
 import os
-from sqlmodel import SQLModel, create_engine, Session
+from typing import Any, Dict, List, Tuple, Union
+
 from sqlalchemy import text
-from typing import List, Dict, Any, Union, Tuple
-import logging
+from sqlmodel import Session, create_engine
 
 # Configure logging
 logger = logging.getLogger(__name__)
 
 # Database configuration - same as persist_taxonomy.py
 DATABASE_URL = os.getenv(
     "DATABASE_URL",
-    "postgresql://u4axloluqibskgvdikuy:g2rXgpHSbztokCbFxSyR@bk8htvifqendwt1wlzat-postgresql.services.clever-cloud.com:7327/bk8htvifqendwt1wlzat"
+    "postgresql://u4axloluqibskgvdikuy:g2rXgpHSbztokCbFxSyR@bk8htvifqendwt1wlzat-postgresql.services.clever-cloud.com:7327/bk8htvifqendwt1wlzat",
 )
 
 # Create database engine - same pattern as persist_taxonomy.py
 db_engine = create_engine(DATABASE_URL, pool_pre_ping=True)
 
+
 class DatabaseClient:
     """SQL client for connecting to the main PostgreSQL database using SQLModel/SQLAlchemy"""
-    
+
     def __init__(self, engine=None):
         """
         Initialize database client
-        
+
         Args:
             engine: SQLAlchemy engine. If None, uses the default engine.
         """
         self.engine = engine or db_engine
-    
-    def execute_query(self, query: str, params: Union[Dict, Tuple, None] = None) -> List[Dict[str, Any]]:
+
+    def execute_query(
+        self, query: str, params: Union[Dict, Tuple, None] = None
+    ) -> List[Dict[str, Any]]:
         """
         Execute a SQL query and return results as a list of dictionaries
-        
+
         Args:
             query: SQL query string
             params: Query parameters (dict for named params, tuple for positional)
-            
+
         Returns:
             List of dictionaries containing query results
         """
         try:
             with Session(self.engine) as session:
                 # Execute the query using SQLAlchemy text()
                 result = session.execute(text(query), params or {})
-                
+
                 # Convert results to list of dictionaries
                 if result.returns_rows:
                     columns = result.keys()
@@ -53,11 +57,11 @@ def execute_query(self, query: str, params: Union[Dict, Tuple, None] = None) ->
         except Exception as e:
             logger.error(f"Error executing query: {e}")
             raise
-    
+
     def list_tables(self) -> List[str]:
         """
         List all tables in the database
-        
+
         Returns:
             List of table names
         """
@@ -67,17 +71,17 @@ def list_tables(self) -> List[str]:
         WHERE table_schema = 'public'
         ORDER BY table_name;
         """
-        
+
         result = self.execute_query(query)
-        return [row['table_name'] for row in result]
+        return [row["table_name"] for row in result]
 
     def get_table_info(self, table_name: str) -> Dict[str, Any]:
         """
         Get information about a table structure
-        
+
         Args:
             table_name: Name of the table
-            
+
         Returns:
             Dictionary containing table information
         """
@@ -91,29 +95,27 @@ def get_table_info(self, table_name: str) -> Dict[str, Any]:
         WHERE table_name = :table_name
         ORDER BY ordinal_position;
         """
-        
+
         columns = self.execute_query(query, {"table_name": table_name})
-        
+
         # Get row count
         count_query = f"SELECT COUNT(*) as count FROM {table_name}"
         count_result = self.execute_query(count_query)
-        row_count = count_result[0]['count'] if count_result else 0
-        
-        return {
-            'table_name': table_name,
-            'columns': columns,
-            'row_count': row_count
-        }
-    
-    def query_table(self, table_name: str, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]:
+        row_count = count_result[0]["count"] if count_result else 0
+
+        return {"table_name": table_name, "columns": columns, "row_count": row_count}
+
+    def query_table(
+        self, table_name: str, limit: int = 10, offset: int = 0
+    ) -> List[Dict[str, Any]]:
         """
         Query any table in the database
-        
+
         Args:
             table_name: Name of the table to query
             limit: Number of rows to return
             offset: Number of rows to skip
-            
+
         Returns:
             List of dictionaries containing table data
         """
@@ -122,50 +124,59 @@ def query_table(self, table_name: str, limit: int = 10, offset: int = 0) -> List
         ORDER BY 1
         LIMIT :limit OFFSET :offset
         """
-        
+
         return self.execute_query(query, {"limit": limit, "offset": offset})
-    
-    def search_table(self, table_name: str, search_term: str, limit: int = 10) -> List[Dict[str, Any]]:
+
+    def search_table(
+        self, table_name: str, search_term: str, limit: int = 10
+    ) -> List[Dict[str, Any]]:
         """
         Search any table by text content in string columns
-        
+
         Args:
             table_name: Name of the table to search
             search_term: Text to search for
             limit: Number of results to return
-            
+
         Returns:
             List of dictionaries containing matching data
         """
         # First get column info to find text columns
         table_info = self.get_table_info(table_name)
-        text_columns = [col['column_name'] for col in table_info['columns'] 
-                       if 'char' in col['data_type'].lower() or 'text' in col['data_type'].lower()]
-        
+        text_columns = [
+            col["column_name"]
+            for col in table_info["columns"]
+            if "char" in col["data_type"].lower() or "text" in col["data_type"].lower()
+        ]
+
         if not text_columns:
             logger.warning(f"No text columns found in table {table_name}")
             return []
-        
+
         # Build dynamic search query
-        search_conditions = " OR ".join([f"{col} ILIKE :search_pattern" for col in text_columns])
+        search_conditions = " OR ".join(
+            [f"{col} ILIKE :search_pattern" for col in text_columns]
+        )
         query = f"""
         SELECT * FROM {table_name}
         WHERE {search_conditions}
         ORDER BY 1
         LIMIT :limit
         """
-        
+
         search_pattern = f"%{search_term}%"
         return self.execute_query(query, {"search_pattern": search_pattern, "limit": limit})
 
-    def query_policies_abstracts_all(self, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]:
+    def query_policies_abstracts_all(
+        self, limit: int = 10, offset: int = 0
+    ) -> List[Dict[str, Any]]:
         """
         Query the policies_abstracts_all table
-        
+
         Args:
             limit: Number of rows to return
             offset: Number of rows to skip
-            
+
         Returns:
             List of dictionaries containing policy data
         """
@@ -174,13 +185,15 @@ def query_policies_abstracts_all(self, limit: int = 10, offset: int = 0) -> List
         ORDER BY openalex_id
         LIMIT :limit OFFSET :offset
         """
-        
-        return self.execute_query(query, {"limit": limit, "offset": offset}) 
-    
-    def save_extraction_results(self, openalex_id: str, extraction_data: Dict[str, Any], conclusion: str) -> bool:
+
+        return self.execute_query(query, {"limit": limit, "offset": offset})
+
+    def save_extraction_results(
+        self, openalex_id: str, extraction_data: Dict[str, Any], conclusion: str
+    ) -> bool:
         """
         Save extraction results to the database
-        
+
         Args:
             openalex_id: OpenAlex ID of the policy
             extraction_data: Extracted data from AI system
@@ -195,17 +208,17 @@ def save_extraction_results(self, openalex_id: str, extraction_data: Dict[str, A
             conclusion = EXCLUDED.conclusion,
             updated_at = CURRENT_TIMESTAMP
         """
-        
+
         # Execute the insert query
         result = self.execute_query(
-            insert_query, 
+            insert_query,
             {
                 "openalex_id": openalex_id,
                 "conclusion": conclusion,
                 "extraction_data": extraction_data,
-            }
+            },
         )
-    
+
     def create_policy_extractions_table(self):
         """
         Create the policy_extractions table
@@ -220,4 +233,4 @@ def create_policy_extractions_table(self):
             updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
         )
         """
-        self.execute_query(query)
+        self.execute_query(query)
Original file line number	Diff line number	Diff line change
`@@ -172,3 +172,4 @@ kotaemon-custom/kotaemon/ktem_app_data`
`172`	`172`
`173`	`173`	`# secret files`
`174`	`174`	`secret*`
	`175`	`+outputs/`