From 0b73b6a6d6b26606052c7812f0df34de0320ac63 Mon Sep 17 00:00:00 2001 From: Lina Date: Tue, 3 Feb 2026 16:52:47 +0100 Subject: [PATCH 1/4] First draft for cursor rules in the submodules --- .cursor/rules/business-objects.mdc | 522 ++++++++++++++++++++++++++ .cursor/rules/database-operations.mdc | 377 +++++++++++++++++++ .cursor/rules/entities.mdc | 315 ++++++++++++++++ .cursor/rules/enums.mdc | 270 +++++++++++++ .cursor/rules/guidelines.mdc | 64 ++++ 5 files changed, 1548 insertions(+) create mode 100644 .cursor/rules/business-objects.mdc create mode 100644 .cursor/rules/database-operations.mdc create mode 100644 .cursor/rules/entities.mdc create mode 100644 .cursor/rules/enums.mdc create mode 100644 .cursor/rules/guidelines.mdc diff --git a/.cursor/rules/business-objects.mdc b/.cursor/rules/business-objects.mdc new file mode 100644 index 0000000..43d164c --- /dev/null +++ b/.cursor/rules/business-objects.mdc @@ -0,0 +1,522 @@ +# Business Objects Guidelines + +Rules and conventions for business object modules in `business_objects/`, `cognition_objects/`, and `global_objects/`. + +## Module Organization + +### Directory Structure + +- **`business_objects/`**: Business domain entities (User, Project, Organization, etc.) +- **`cognition_objects/`**: Cognition domain entities (Conversation, Message, Strategy, etc.) +- **`global_objects/`**: Global/shared entities (AdminQueries, EtlTask, etc.) +- **`integration_objects/`**: Integration-specific entities (SharePoint, GitHub, PDF, etc.) with specialized CRUD patterns + +### File Naming + +- One file per entity +- File name matches entity name in `snake_case` +- Example: `user.py` for `User` entity, `conversation.py` for `Conversation` entity + +## Standard CRUD Functions + +Every business object module should provide these standard functions: + +### Get Functions + +#### Single Entity + +```python +def get(project_id: str, entity_id: str) -> Entity: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id, Entity.id == entity_id) + .first() + ) +``` + +#### By ID Only + +```python +def get(entity_id: str) -> Entity: + return session.query(Entity).get(entity_id) +``` + +#### By ID Only (Alternative Pattern) + +Some entities use `get_by_id()` when only ID is needed (no project_id scope): + +```python +def get_by_id(entity_id: str) -> Entity: + return session.query(Entity).filter(Entity.id == entity_id).first() +``` + +#### Multiple Entities + +```python +def get_all(project_id: str) -> List[Entity]: + return session.query(Entity).filter(Entity.project_id == project_id).all() + +def get_by_id_list(entity_ids: List[str]) -> List[Entity]: + return session.query(Entity).filter(Entity.id.in_(entity_ids)).all() +``` + +#### Existence Check + +```python +def exists(project_id: str, entity_id: str) -> bool: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id, Entity.id == entity_id) + .first() + is not None + ) +``` + +#### Scoped Queries + +For user-scoped queries (entities owned by a user): + +```python +def get_scoped(project_id: str, entity_id: str, user_id: str) -> Entity: + return ( + session.query(Entity) + .filter( + Entity.project_id == project_id, + Entity.id == entity_id, + Entity.created_by == user_id, + ) + .first() + ) +``` + +#### Count Queries + +```python +def get_count(project_id: str) -> int: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id) + .count() + ) +``` + +#### Get By Name/Field + +```python +def get_by_name(name: str) -> Entity: + return session.query(Entity).filter(Entity.name == name).first() +``` + +### Create Function + +```python +def create( + project_id: str, + name: str, + created_by: str, + with_commit: bool = False, + created_at: Optional[datetime] = None, + # ... other parameters +) -> Entity: + entity = Entity( + project_id=project_id, + name=name, + created_by=created_by, + created_at=created_at, + # ... other fields + ) + + general.add(entity, with_commit) + return entity +``` + +**Rules:** +- Always accept `with_commit` parameter (default `False`) +- Accept optional `created_at` for testing/time travel scenarios +- Use `general.add()` to add to session +- Return the created entity + +### Update Function + +```python +def update( + project_id: str, + entity_id: str, + name: Optional[str] = None, + description: Optional[str] = None, + with_commit: bool = True, + # ... other optional parameters +) -> Entity: + entity = get(project_id, entity_id) + if not entity: + raise ValueError(f"Entity {entity_id} not found") + + if name is not None: + entity.name = name + if description is not None: + entity.description = description + + # For JSON fields, use flag_modified + if some_json_field is not None: + entity.some_json_field = some_json_field + flag_modified(entity, 'some_json_field') + + flush_or_commit(with_commit) + return entity +``` + +**Rules:** +- All update parameters should be `Optional` +- Use `None` checks to only update provided fields +- Use `flag_modified()` for JSON fields +- Default `with_commit=True` for updates (different from create) +- Return the updated entity + +### Delete Function + +```python +def delete( + project_id: str, + entity_id: str, + with_commit: bool = False, +) -> None: + entity = get(project_id, entity_id) + if not entity: + return # Or raise exception + + general.delete(entity, with_commit) +``` + +**Rules:** +- Default `with_commit=False` for deletes +- Handle missing entities appropriately (return or raise) +- Use `general.delete()` helper + +### Batch Delete Function + +For deleting multiple entities: + +```python +def delete_many(entity_ids: List[str], with_commit: bool = False) -> None: + entities = session.query(Entity).filter(Entity.id.in_(entity_ids)).all() + for entity in entities: + general.delete(entity, with_commit=False) + general.flush_or_commit(with_commit) +``` + +**Rules:** +- Use for bulk deletion operations +- Default `with_commit=False` to allow batching +- Consider performance implications for large batches + +## Transaction Management + +### `with_commit` Parameter + +- **`False`**: Flush to database but don't commit (default for create/delete) +- **`True`**: Commit transaction immediately (default for update) +- **`None`**: No flush or commit (rare, for batch operations) + +### Flush vs Commit + +- **Flush**: Writes to database but transaction can be rolled back +- **Commit**: Permanently saves changes +- Use `general.flush_or_commit(with_commit)` helper + +```python +from ..business_objects import general + +general.add(entity, with_commit=False) # Flush only +general.add(entity, with_commit=True) # Commit +``` + +## Query Patterns + +### Basic Query + +```python +from ..session import session +from ..models import Entity + +def get(project_id: str, entity_id: str) -> Entity: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id) + .filter(Entity.id == entity_id) + .first() + ) +``` + +### Filtering + +```python +def get_active(project_id: str) -> List[Entity]: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id) + .filter(Entity.is_active == True) + .all() + ) +``` + +### Ordering + +```python +def get_all_ordered(project_id: str) -> List[Entity]: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id) + .order_by(Entity.created_at.desc()) + .all() + ) +``` + +### Joins + +```python +def get_with_relations(project_id: str) -> List[Entity]: + return ( + session.query(Entity) + .join(RelatedEntity) + .filter(Entity.project_id == project_id) + .all() + ) +``` + +### Subqueries + +Use subqueries for complex filtering: + +```python +def get_by_subquery(project_id: str, user_id: str) -> List[Entity]: + teams_subquery = ( + session.query(TeamMember.team_id) + .filter(TeamMember.user_id == user_id) + .subquery() + ) + return ( + session.query(Entity) + .join(TeamMember, TeamMember.entity_id == Entity.id) + .filter( + Entity.project_id == project_id, + TeamMember.team_id.in_(teams_subquery), + ) + .all() + ) +``` + +## SQL Injection Prevention + +### Always Use Parameterized Queries + +**❌ Bad:** +```python +query = f"SELECT * FROM table WHERE id = '{user_id}'" +``` + +**✅ Good:** +```python +entity = session.query(Entity).filter(Entity.id == user_id).first() +``` + +### Raw SQL Queries + +For raw SQL, use `prevent_sql_injection()`: + +```python +from ..util import prevent_sql_injection + +user_id = prevent_sql_injection(user_id, isinstance(user_id, str)) +query = f"SELECT * FROM users WHERE id = '{user_id}'" +``` + +**When to use raw SQL:** +- Complex aggregations (JSON aggregation, counts, etc.) +- Performance-critical queries that benefit from raw SQL +- Queries that are difficult to express with ORM + +**Pattern for complex raw SQL:** + +```python +def get_with_aggregations(project_id: str) -> Dict: + project_id = prevent_sql_injection(project_id, isinstance(project_id, str)) + + query = f""" + SELECT + json_agg(json_build_object( + 'id', entity.id, + 'name', entity.name + )) AS entities + FROM entity + WHERE project_id = '{project_id}'::UUID + """ + + result = general.execute_first(query) + return result[0] if result and result[0] else {} +``` + +**Rules:** +- Always use `prevent_sql_injection()` for user-provided values +- Use `general.execute_first()` for single-row results +- Use `general.execute_all()` for multiple-row results +- Prefer ORM queries when possible for type safety + +## JSON Field Updates + +When updating JSON fields, use `flag_modified()`: + +```python +from sqlalchemy.orm.attributes import flag_modified + +def update_metadata(project_id: str, entity_id: str, metadata: Dict): + entity = get(project_id, entity_id) + entity.metadata = metadata + flag_modified(entity, 'metadata') + general.flush_or_commit(True) +``` + +## Caching + +Use `TTLCacheDecorator` for frequently accessed, rarely changing data: + +```python +from ..db_cache import TTLCacheDecorator, CacheEnum + +@TTLCacheDecorator(CacheEnum.USER, 5, "user_id") +def get_user_cached(user_id: str) -> User: + user = get(user_id) + if not user: + return None + + # Expunge and make transient to avoid session issues + general.expunge(user) + general.make_transient(user) + return user +``` + +**Rules:** +- Use caching for read-heavy, rarely changing data +- Always expunge and make transient cached objects +- Specify appropriate TTL (time-to-live) in seconds + +## Imports + +### Standard Import Pattern + +```python +from datetime import datetime +from typing import List, Optional, Dict, Any + +from . import general # Always import general for CRUD operations +from ..session import session +from ..models import Entity, RelatedEntity +from ..enums import SomeEnum +from ..util import prevent_sql_injection +``` + +### Cross-Module Imports + +Import other business objects when needed: + +```python +from . import user, project, organization +``` + +## Error Handling + +### Entity Not Found + +```python +def get_or_raise(project_id: str, entity_id: str) -> Entity: + entity = get(project_id, entity_id) + if not entity: + raise ValueError(f"Entity {entity_id} not found in project {project_id}") + return entity +``` + +### Validation + +Validate inputs before database operations: + +```python +def create(project_id: str, name: str, **kwargs) -> Entity: + if not name or not name.strip(): + raise ValueError("Name cannot be empty") + + # ... create entity +``` + +## Helper Functions + +### Setting Values + +Use `set_values_on_item()` helper: + +```python +from .util import set_values_on_item + +def update(entity_id: str, **kwargs) -> Entity: + entity = get(entity_id) + return set_values_on_item(entity, **kwargs) +``` + +### Database Time + +Use `get_db_now()` for consistent timestamps: + +```python +from .util import get_db_now + +created_at = get_db_now() +``` + +## Integration Objects Pattern + +The `integration_objects/` directory contains specialized patterns for integration entities: + +### Manager Pattern + +Integration objects use a manager pattern with type-based model selection: + +```python +from ..integration_objects.manager import get, create, update, delete_many +from ..integration_objects.helper import get_supported_metadata_keys + +# Get integration model type based on integration +IntegrationModel = integration_model(integration_id=integration_id) + +# Use manager functions with model type +records = get(IntegrationModel, integration_id) +record = create(IntegrationModel, created_by=user_id, integration_id=integration_id, ...) +``` + +### Metadata Handling + +Integration objects have dynamic metadata fields: + +```python +from ..integration_objects.helper import get_supported_metadata + +# Filter metadata to only supported keys +supported_metadata = get_supported_metadata(table_name, raw_metadata) +``` + +**Rules:** +- Use `integration_objects/manager.py` for CRUD operations +- Use `integration_objects/helper.py` for metadata validation +- Metadata keys are defined per integration type in `helper.py` + +## Best Practices + +1. **Always use `general.add()`** instead of `session.add()` directly +2. **Default `with_commit=False`** for create/delete, `True` for update +3. **Return entities** from create/update functions +4. **Use type hints** for all function parameters and return types +5. **Handle None cases** appropriately (return None or raise exception) +6. **Use `flag_modified()`** for JSON field updates +7. **Prevent SQL injection** in all queries +8. **Cache appropriately** for read-heavy operations +9. **Group related queries** in the same module +10. **Keep functions focused** - one responsibility per function +11. **Use `exists()`** for existence checks instead of checking `get()` result +12. **Use `get_count()`** for counting entities instead of `len(get_all())` diff --git a/.cursor/rules/database-operations.mdc b/.cursor/rules/database-operations.mdc new file mode 100644 index 0000000..d970d7e --- /dev/null +++ b/.cursor/rules/database-operations.mdc @@ -0,0 +1,377 @@ +# Database Operations Guidelines + +Rules and conventions for database operations, session management, and transactions. + +## Session Management + +### Session Access + +Always use the session from `session.py`: + +```python +from ..session import session +``` + +**Never create your own session** - use the shared scoped session. + +### Context Variables + +The session uses context variables for request tracking: + +```python +from ..session import request_id_ctx_var, get_request_id + +# Get current request ID +request_id = get_request_id() +``` + +### Session Lifecycle + +- Sessions are automatically scoped per request +- Use `general.get_ctx_token()` to create a new session context +- Use `general.reset_ctx_token()` to clean up session context + +## CRUD Operations + +### Adding Entities + +Use `general.add()` instead of `session.add()`: + +```python +from ..business_objects import general + +def create(...) -> Entity: + entity = Entity(...) + general.add(entity, with_commit=False) + return entity +``` + +### Adding Multiple Entities + +Use `general.add_all()` for batch operations: + +```python +entities = [Entity(...), Entity(...)] +general.add_all(entities, with_commit=False) +``` + +### Deleting Entities + +Use `general.delete()`: + +```python +def delete(entity_id: str, with_commit: bool = False): + entity = get(entity_id) + general.delete(entity, with_commit) +``` + +## Transaction Control + +### Flush vs Commit + +- **Flush**: Writes changes to database but transaction can be rolled back +- **Commit**: Permanently saves changes to database + +### `flush_or_commit()` Helper + +```python +from ..business_objects import general + +# Flush only (default) +general.flush_or_commit(False) + +# Commit transaction +general.flush_or_commit(True) + +# No operation +general.flush_or_commit(None) +``` + +### `with_commit` Parameter Pattern + +- **Create/Delete**: Default `with_commit=False` (flush only) +- **Update**: Default `with_commit=True` (commit immediately) + +```python +def create(..., with_commit: bool = False) -> Entity: + # ... + general.add(entity, with_commit) + +def update(..., with_commit: bool = True) -> Entity: + # ... + general.flush_or_commit(with_commit) +``` + +## Commit and Rollback + +### Manual Commit + +```python +from ..business_objects import general + +general.commit() +``` + +### Rollback + +```python +from ..business_objects import general + +general.rollback() +``` + +**Use rollback** when errors occur to maintain data consistency. + +### Session Removal + +```python +from ..business_objects import general + +# Remove and refresh session +general.remove_and_refresh_session() + +# Remove session only +general.remove_session() +``` + +## Query Execution + +### ORM Queries + +Use SQLAlchemy ORM for type-safe queries: + +```python +from ..session import session +from ..models import User + +users = session.query(User).filter(User.role == "ENGINEER").all() +``` + +### Raw SQL Queries + +For raw SQL, use `general.execute()`: + +```python +from ..business_objects import general + +# Execute and get all results +results = general.execute_all("SELECT * FROM users") + +# Execute and get first result +result = general.execute_first("SELECT COUNT(*) FROM users") + +# Execute with parameters +query = "SELECT * FROM users WHERE id = :user_id" +result = general.execute(query, {"user_id": user_id}) +``` + +### SQL Injection Prevention + +**Always use parameterized queries** or `prevent_sql_injection()`: + +```python +from ..util import prevent_sql_injection + +# For raw SQL +user_id = prevent_sql_injection(user_id, isinstance(user_id, str)) +query = f"SELECT * FROM users WHERE id = '{user_id}'" + +# Prefer ORM queries (automatically safe) +user = session.query(User).filter(User.id == user_id).first() +``` + +## Object State Management + +### Expunge + +Remove object from session (useful for caching): + +```python +from ..business_objects import general + +general.expunge(entity) +``` + +### Make Transient + +Make object transient (detached from session): + +```python +from ..business_objects import general + +general.make_transient(entity) +``` + +**Common pattern for cached objects:** + +```python +@TTLCacheDecorator(CacheEnum.USER, 5, "user_id") +def get_user_cached(user_id: str) -> User: + user = get(user_id) + if not user: + return None + + general.expunge(user) + general.make_transient(user) + return user +``` + +### Refresh + +Refresh object from database: + +```python +from ..business_objects import general + +entity = general.refresh(entity) +``` + +## Error Handling + +### Session Rollback on Error + +Always rollback on exceptions: + +```python +from ..business_objects import general + +try: + # Database operations + general.add(entity, with_commit=True) +except Exception as e: + general.rollback() + raise +``` + +### Check Session State + +```python +from ..session import check_session_and_rollback + +try: + check_session_and_rollback() +except Exception: + # Handle session error + pass +``` + +## Connection Pooling + +### Configuration + +Connection pool settings are configured via environment variables: + +- `POSTGRES_POOL_SIZE`: Pool size (default: 20) +- `POSTGRES_POOL_MAX_OVERFLOW`: Max overflow (default: 10) +- `POSTGRES_POOL_RECYCLE`: Recycle time in seconds (default: 3600) +- `POSTGRES_POOL_USE_LIFO`: Use LIFO instead of FIFO (default: false) +- `POSTGRES_POOL_PRE_PING`: Test connections on checkout (default: true) + +### Best Practices + +- Don't hold connections longer than necessary +- Use appropriate pool size for your workload +- Enable `pool_pre_ping` to handle stale connections +- Set appropriate `pool_recycle` to refresh connections + +## Query Building Helpers + +### `construct_select_columns()` + +Build SELECT column lists dynamically: + +```python +from ..business_objects import general + +columns = general.construct_select_columns( + table="user", + prefix="u", + exclude_columns=["password"], + include_columns=["id", "email", "name"], +) +``` + +### `simple_selection_builder()` + +Build simple SELECT queries: + +```python +from ..business_objects import general + +query = general.simple_selection_builder( + table="user", + exclude_columns=["password"], + where_condition="role = 'ENGINEER'", + order_by="created_at DESC", +) +``` + +## Debugging + +### Print ORM Query + +Print the SQL generated by ORM queries: + +```python +from ..business_objects import general + +query = session.query(User).filter(User.role == "ENGINEER") +general.print_orm_query(query) +``` + +### Get Dialect + +```python +from ..business_objects import general + +dialect = general.get_dialect() +``` + +## Best Practices + +1. **Always use `general.add()`** instead of `session.add()` directly +2. **Use `with_commit` parameter** to control transaction boundaries +3. **Default `with_commit=False`** for create/delete, `True` for update +4. **Rollback on errors** to maintain data consistency +5. **Use ORM queries** when possible (type-safe, prevents SQL injection) +6. **Prevent SQL injection** in all raw SQL queries +7. **Expunge and make transient** cached objects +8. **Don't hold sessions** longer than necessary +9. **Use batch operations** (`add_all`) when adding multiple entities +10. **Refresh objects** when you need latest data from database + +## Common Patterns + +### Transaction Pattern + +```python +def create_with_related(project_id: str, name: str, user_id: str): + try: + project = create(project_id, name, with_commit=False) + user_project = create_user_project(user_id, project.id, with_commit=False) + general.commit() + return project + except Exception: + general.rollback() + raise +``` + +### Batch Create Pattern + +```python +def create_batch(entities_data: List[Dict]) -> List[Entity]: + entities = [Entity(**data) for data in entities_data] + general.add_all(entities, with_commit=True) + return entities +``` + +### Safe Query Pattern + +```python +def get_safe(project_id: str, entity_id: str) -> Optional[Entity]: + try: + return get(project_id, entity_id) + except Exception: + general.rollback() + return None +``` diff --git a/.cursor/rules/entities.mdc b/.cursor/rules/entities.mdc new file mode 100644 index 0000000..2b13f92 --- /dev/null +++ b/.cursor/rules/entities.mdc @@ -0,0 +1,315 @@ +# Entity Model Guidelines + +Rules and conventions for defining SQLAlchemy entity models in `models.py`. + +## Table Definition + +### Base Class + +All entity models must inherit from `Base`: + +```python +from sqlalchemy.ext.declarative import declarative_base +Base = declarative_base() + +class MyEntity(Base): + __tablename__ = Tablenames.MY_ENTITY.value + # ... +``` + +### Table Name + +- Use the `Tablenames` enum for table names +- Table names must be in `snake_case` +- Always use `__tablename__ = Tablenames.ENTITY_NAME.value` + +```python +class User(Base): + __tablename__ = Tablenames.USER.value +``` + +## Column Ordering Convention + +Follow this order (guideline, not strict rule - context-based columns should be grouped together): + +1. **`__tablename__` & `__table_args__`** (if needed) +2. **Primary key(s)** - typically `id` +3. **Foreign keys** - sorted from broader scope to more specific + - Example: `org_id` → `project_id` → `user_id` +4. **Other columns** - sorted from broader scope to more specific + - Example: `name` → `description` → `some_flag` +5. **Columns added after initial creation** - append at the end + +### Example + +```python +class Project(Base): + __tablename__ = Tablenames.PROJECT.value + + # 1. Primary key + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + + # 2. Foreign keys (broader to specific) + organization_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.ORGANIZATION.value}.id", ondelete="CASCADE"), + index=True, + ) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=True, + ) + + # 3. Other columns (broader to specific) + name = Column(String) + description = Column(String) + is_active = Column(Boolean, default=True) + + # 4. Timestamps + created_at = Column(DateTime, default=sql.func.now()) +``` + +## Primary Keys + +### UUID Primary Keys + +- Use `UUID(as_uuid=True)` for primary keys +- Default to `uuid.uuid4` for new entities +- Exception: Composite keys or string-based keys when appropriate + +```python +id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) +``` + +### Composite Primary Keys + +When needed, define multiple columns as primary keys: + +```python +__table_args__ = ( + PrimaryKeyConstraint('col1', 'col2'), +) +``` + +## Foreign Keys + +### Definition + +- Always specify `ForeignKey` with table reference +- Use `ondelete` cascade behavior explicitly +- Add `index=True` for foreign keys (performance) + +```python +project_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="CASCADE"), + index=True, +) +``` + +### Cascade Behaviors + +- **`CASCADE`**: Delete child when parent is deleted +- **`SET NULL`**: Set foreign key to NULL when parent is deleted (requires nullable=True) +- **`RESTRICT`**: Prevent deletion if children exist + +Common patterns: +- User-created entities: `ondelete="SET NULL"` (preserve data if user deleted) +- Ownership relationships: `ondelete="CASCADE"` (delete children with parent) + +### Naming + +- Foreign key columns: `{entity}_id` (snake_case) +- Example: `organization_id`, `project_id`, `user_id` + +## Relationships + +### Using `parent_to_child_relationship()` + +Use the helper function for 1:N relationships: + +```python +from ..models import parent_to_child_relationship + +projects = parent_to_child_relationship( + Tablenames.ORGANIZATION, + Tablenames.PROJECT, +) +``` + +### Cascade Behavior + +Specify cascade behavior: + +```python +# Keep parent when child is deleted (default) +projects = parent_to_child_relationship( + Tablenames.ORGANIZATION, + Tablenames.PROJECT, + cascase_behaviour=CascadeBehaviour.KEEP_PARENT_ON_CHILD_DELETION, +) + +# Delete both if either is deleted +projects = parent_to_child_relationship( + Tablenames.ORGANIZATION, + Tablenames.PROJECT, + cascase_behaviour=CascadeBehaviour.DELETE_BOTH_IF_EITHER_IS_DELETED, +) +``` + +### Ordering + +Specify `order_by` for relationships: + +```python +projects = parent_to_child_relationship( + Tablenames.USER, + Tablenames.PROJECT, + order_by="created_at.desc()", +) +``` + +## Column Types + +### Common Types + +- **UUID**: `UUID(as_uuid=True)` +- **String**: `String` (no length unless required) +- **Integer**: `Integer` +- **BigInteger**: `BigInteger` (for large numbers) +- **Boolean**: `Boolean` (with `default=False` or `default=True`) +- **DateTime**: `DateTime` (with `default=sql.func.now()` for timestamps) +- **JSON**: `JSON` (for structured data) +- **LargeBinary**: `LargeBinary` (for binary data) +- **ARRAY**: `ARRAY(String)` or `ARRAY(Integer)` (PostgreSQL arrays) + +### Timestamps + +Always include `created_at` for entities: + +```python +created_at = Column(DateTime, default=sql.func.now()) +``` + +Add `updated_at` when modification tracking is needed: + +```python +updated_at = Column(DateTime, onupdate=sql.func.now()) +``` + +## Defaults + +### Boolean Defaults + +Always specify explicit defaults: + +```python +is_active = Column(Boolean, default=True) +is_deleted = Column(Boolean, default=False) +``` + +### JSON Defaults + +Use dictionaries for JSON defaults: + +```python +config = Column(JSON, default={}) +metadata = Column(JSON, default={}) +``` + +### Enum Defaults + +Use enum values: + +```python +from ..enums import UserRoles + +role = Column(String, default=UserRoles.ENGINEER.value) +``` + +## Indexes + +### Foreign Keys + +Always index foreign keys: + +```python +project_id = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.PROJECT.value}.id", ondelete="CASCADE"), + index=True, # Required +) +``` + +### Composite Indexes + +Use `Index` for composite indexes: + +```python +from sqlalchemy import Index + +__table_args__ = ( + Index('idx_project_user', 'project_id', 'user_id'), +) +``` + +### Unique Constraints + +Use `UniqueConstraint` for unique combinations: + +```python +from sqlalchemy import UniqueConstraint + +__table_args__ = ( + UniqueConstraint('project_id', 'name', name='uq_project_name'), +) +``` + +## Comments + +Add comments for: +- Non-obvious foreign keys (e.g., multi-field foreign keys) +- Enum types +- Complex business logic + +```python +# no foreign key since its a multi field +xfkey = Column(UUID(as_uuid=True), index=True) +# of type CommentCategory e.g. USER +xftype = Column(String, index=True) +``` + +## Class Naming + +- Use **PascalCase** for class names +- Match the table name concept (singular form) +- Examples: `User`, `Project`, `Organization`, `CommentData` + +## Import Organization + +Group imports logically: + +```python +# Standard library +import uuid + +# Third-party +from sqlalchemy import Column, String, DateTime, ForeignKey +from sqlalchemy.dialects.postgresql import UUID + +# Local imports +from .enums import Tablenames, UserRoles +from .models import Base +``` + +## Best Practices + +1. **Always use enums** for table names and enum values +2. **Index foreign keys** for query performance +3. **Specify cascade behaviors** explicitly +4. **Use UUIDs** for primary keys unless there's a specific reason not to +5. **Add timestamps** (`created_at`) to track entity creation +6. **Document complex relationships** with comments +7. **Follow column ordering** convention for maintainability +8. **Use appropriate column types** (don't use String for everything) diff --git a/.cursor/rules/enums.mdc b/.cursor/rules/enums.mdc new file mode 100644 index 0000000..272eec1 --- /dev/null +++ b/.cursor/rules/enums.mdc @@ -0,0 +1,270 @@ +# Enum Guidelines + +Rules and conventions for enum definitions in `enums.py`. + +## Base Enum Class + +All enums should inherit from `EnumKern` which provides helper methods: + +```python +from enum import Enum + +class EnumKern(Enum): + @classmethod + def all(cls): + return [e.value for e in cls] + + @classmethod + def from_string(cls, value: str): + changed_value = value.upper().replace(" ", "_").replace("-", "_") + for member in cls: + if member.value == changed_value: + return member + raise ValueError(f"Unknown enum {cls.__name__}: {value}") +``` + +## Enum Definition + +### Basic Structure + +```python +class MyEnum(EnumKern): + VALUE_ONE = "VALUE_ONE" + VALUE_TWO = "VALUE_TWO" + VALUE_THREE = "VALUE_THREE" +``` + +### Naming Conventions + +- **Enum class names**: PascalCase (e.g., `UserRoles`, `ProjectStatus`) +- **Enum member names**: UPPER_SNAKE_CASE (e.g., `ENGINEER`, `INIT_COMPLETE`) +- **Enum values**: Match member names or use descriptive strings + +```python +class UserRoles(EnumKern): + ENGINEER = "ENGINEER" + EXPERT = "EXPERT" + ANNOTATOR = "ANNOTATOR" +``` + +## Common Enums + +### Tablenames Enum + +**Critical**: All table names must be defined in `Tablenames` enum: + +```python +class Tablenames(EnumKern): + USER = "user" + PROJECT = "project" + ORGANIZATION = "organization" + # ... +``` + +**Rules:** +- Values are `snake_case` (matching database table names) +- Must match `__tablename__` in entity models exactly +- Used throughout codebase for table references + +### Status/State Enums + +```python +class ProjectStatus(EnumKern): + INIT_UPLOAD = "INIT_UPLOAD" + INIT_COMPLETE = "INIT_COMPLETE" + IN_DELETION = "IN_DELETION" + HIDDEN = "HIDDEN" +``` + +### Role Enums + +```python +class UserRoles(EnumKern): + ENGINEER = "ENGINEER" + EXPERT = "EXPERT" + ANNOTATOR = "ANNOTATOR" +``` + +### Type Enums + +```python +class DataTypes(EnumKern): + INTEGER = "INTEGER" + FLOAT = "FLOAT" + BOOLEAN = "BOOLEAN" + TEXT = "TEXT" + CATEGORY = "CATEGORY" +``` + +## Using Enums + +### In Entity Models + +```python +from ..enums import UserRoles + +class User(Base): + role = Column(String, default=UserRoles.ENGINEER.value) +``` + +**Always use `.value`** when storing in database columns. + +### In Business Objects + +```python +from ..enums import UserRoles + +def get_engineers(org_id: str) -> List[User]: + return ( + session.query(User) + .filter(User.role == UserRoles.ENGINEER.value) + .all() + ) +``` + +### Enum Comparison + +```python +# Compare enum values +if user.role == UserRoles.ENGINEER.value: + # ... + +# Check if value is in enum +if value in UserRoles.all(): + # ... +``` + +## Helper Methods + +### `all()` Method + +Get all enum values as a list: + +```python +roles = UserRoles.all() # ["ENGINEER", "EXPERT", "ANNOTATOR"] +``` + +### `from_string()` Method + +Convert string to enum member (case-insensitive, handles spaces/dashes): + +```python +role = UserRoles.from_string("engineer") # Returns UserRoles.ENGINEER +role = UserRoles.from_string("expert") # Returns UserRoles.EXPERT +``` + +**Note**: This method normalizes input (uppercase, replaces spaces/dashes with underscores). + +## Enum Parsing + +### `try_parse_enum_value()` Helper + +Use for safe enum parsing: + +```python +from ..enums import try_parse_enum_value, Tablenames + +table_enum = try_parse_enum_value(table_name, Tablenames) +``` + +This safely converts a string to an enum member, handling errors gracefully. + +## Enum Values in Database + +### Storage + +- Enums are stored as **strings** in the database (`String` column type) +- Always store `.value` property, not the enum member itself +- Database columns use `String` type, not PostgreSQL ENUM type + +### Defaults + +```python +class User(Base): + role = Column(String, default=UserRoles.ENGINEER.value) + status = Column(String, default=ProjectStatus.INIT_UPLOAD.value) +``` + +## Enum Organization + +### Group Related Enums + +Keep related enums together: + +```python +# User-related enums +class UserRoles(EnumKern): + # ... + +class UserStatus(EnumKern): + # ... + +# Project-related enums +class ProjectStatus(EnumKern): + # ... + +class ProjectType(EnumKern): + # ... +``` + +### Import Organization + +When importing enums: + +```python +from .enums import ( + UserRoles, + ProjectStatus, + Tablenames, + DataTypes, +) +``` + +## Best Practices + +1. **Always inherit from `EnumKern`** for helper methods +2. **Use descriptive names** that clearly indicate purpose +3. **Keep values consistent** - prefer matching member names +4. **Add to `Tablenames` enum** when creating new tables +5. **Use `.value`** when storing in database columns +6. **Use enum comparisons** instead of string comparisons +7. **Document complex enums** with comments +8. **Group related enums** together in the file +9. **Use `try_parse_enum_value()`** for safe parsing +10. **Keep enum values stable** - don't change existing values (add new ones instead) + +## Common Patterns + +### Enum with Methods + +```python +class UserRoles(EnumKern): + ENGINEER = "ENGINEER" + EXPERT = "EXPERT" + ANNOTATOR = "ANNOTATOR" + + def is_admin(self) -> bool: + return self in [UserRoles.ENGINEER, UserRoles.EXPERT] +``` + +### Enum Validation + +```python +def validate_role(role: str) -> UserRoles: + try: + return UserRoles.from_string(role) + except ValueError: + raise ValueError(f"Invalid role: {role}") +``` + +### Enum in Type Hints + +```python +from typing import Literal + +UserRoleType = Literal[ + UserRoles.ENGINEER.value, + UserRoles.EXPERT.value, + UserRoles.ANNOTATOR.value, +] +``` diff --git a/.cursor/rules/guidelines.mdc b/.cursor/rules/guidelines.mdc new file mode 100644 index 0000000..4abe82c --- /dev/null +++ b/.cursor/rules/guidelines.mdc @@ -0,0 +1,64 @@ +# Model Guidelines + +This directory contains guidelines and conventions for the `submodules/model` module. These guidelines ensure consistency, maintainability, and correctness across all database models, business objects, and related code. + +## Overview + +The `submodules/model` module follows a layered architecture: + +- **Entity Models** (`models.py`): SQLAlchemy ORM models representing database tables +- **Business Objects** (`business_objects/`, `cognition_objects/`, `global_objects/`): Business logic and CRUD operations for entities +- **Integration Objects** (`integration_objects/`): Specialized logic for integration entities (SharePoint, GitHub, PDF, etc.) +- **Enums** (`enums.py`): Type-safe enumeration definitions +- **Utilities** (`util.py`, `session.py`): Helper functions and database session management + +## Guideline Files + +- **[entities.mdc](./entities.mdc)** - Rules for defining SQLAlchemy entity models +- **[business-objects.mdc](./business-objects.mdc)** - Rules for business object modules and CRUD operations +- **[enums.mdc](./enums.mdc)** - Rules for enum definitions and usage +- **[database-operations.mdc](./database-operations.mdc)** - Rules for database operations, session management, and transactions + +## General Principles + +1. **Separation of Concerns**: Entity models define structure, business objects contain logic +2. **Type Safety**: Use enums and type hints consistently +3. **Consistency**: Follow established naming conventions and patterns +4. **Security**: Always use parameterized queries and prevent SQL injection +5. **Performance**: Use appropriate indexes, caching, and query optimization + +## Quick Reference + +### Directory Structure + +``` +submodules/model/ +├── models.py # All SQLAlchemy entity models +├── enums.py # Enum definitions +├── session.py # Database session management +├── util.py # Utility functions +├── business_objects/ # Business domain logic +│ ├── user.py +│ ├── project.py +│ └── ... +├── cognition_objects/ # Cognition domain logic +│ ├── conversation.py +│ ├── message.py +│ └── ... +├── global_objects/ # Global/shared logic +│ └── ... +└── integration_objects/ # Integration-specific logic + ├── helper.py + └── manager.py +``` + +### Common Patterns + +- Entity models inherit from `Base` (SQLAlchemy declarative_base) +- Business objects provide `get()`, `create()`, `update()`, `delete()` functions +- Use `with_commit` parameter to control transaction boundaries +- Foreign keys use `ondelete` cascades appropriately +- UUIDs are used for primary keys +- Timestamps use `created_at` with `default=sql.func.now()` + +For detailed rules, see the individual guideline files referenced above. From 0962ec0c363ff45501ac1b9f17a4f3506da9708b Mon Sep 17 00:00:00 2001 From: Lina Date: Tue, 3 Feb 2026 16:59:31 +0100 Subject: [PATCH 2/4] Always apply logic for submodules model --- .cursor/rules/business-objects.mdc | 149 +------------------------- .cursor/rules/database-operations.mdc | 4 + .cursor/rules/entities.mdc | 4 + .cursor/rules/enums.mdc | 4 + .cursor/rules/guidelines.mdc | 4 + 5 files changed, 19 insertions(+), 146 deletions(-) diff --git a/.cursor/rules/business-objects.mdc b/.cursor/rules/business-objects.mdc index 43d164c..76c9009 100644 --- a/.cursor/rules/business-objects.mdc +++ b/.cursor/rules/business-objects.mdc @@ -1,149 +1,6 @@ -# Business Objects Guidelines - -Rules and conventions for business object modules in `business_objects/`, `cognition_objects/`, and `global_objects/`. - -## Module Organization - -### Directory Structure - -- **`business_objects/`**: Business domain entities (User, Project, Organization, etc.) -- **`cognition_objects/`**: Cognition domain entities (Conversation, Message, Strategy, etc.) -- **`global_objects/`**: Global/shared entities (AdminQueries, EtlTask, etc.) -- **`integration_objects/`**: Integration-specific entities (SharePoint, GitHub, PDF, etc.) with specialized CRUD patterns - -### File Naming - -- One file per entity -- File name matches entity name in `snake_case` -- Example: `user.py` for `User` entity, `conversation.py` for `Conversation` entity - -## Standard CRUD Functions - -Every business object module should provide these standard functions: - -### Get Functions - -#### Single Entity - -```python -def get(project_id: str, entity_id: str) -> Entity: - return ( - session.query(Entity) - .filter(Entity.project_id == project_id, Entity.id == entity_id) - .first() - ) -``` - -#### By ID Only - -```python -def get(entity_id: str) -> Entity: - return session.query(Entity).get(entity_id) -``` - -#### By ID Only (Alternative Pattern) - -Some entities use `get_by_id()` when only ID is needed (no project_id scope): - -```python -def get_by_id(entity_id: str) -> Entity: - return session.query(Entity).filter(Entity.id == entity_id).first() -``` - -#### Multiple Entities - -```python -def get_all(project_id: str) -> List[Entity]: - return session.query(Entity).filter(Entity.project_id == project_id).all() - -def get_by_id_list(entity_ids: List[str]) -> List[Entity]: - return session.query(Entity).filter(Entity.id.in_(entity_ids)).all() -``` - -#### Existence Check - -```python -def exists(project_id: str, entity_id: str) -> bool: - return ( - session.query(Entity) - .filter(Entity.project_id == project_id, Entity.id == entity_id) - .first() - is not None - ) -``` - -#### Scoped Queries - -For user-scoped queries (entities owned by a user): - -```python -def get_scoped(project_id: str, entity_id: str, user_id: str) -> Entity: - return ( - session.query(Entity) - .filter( - Entity.project_id == project_id, - Entity.id == entity_id, - Entity.created_by == user_id, - ) - .first() - ) -``` - -#### Count Queries - -```python -def get_count(project_id: str) -> int: - return ( - session.query(Entity) - .filter(Entity.project_id == project_id) - .count() - ) -``` - -#### Get By Name/Field - -```python -def get_by_name(name: str) -> Entity: - return session.query(Entity).filter(Entity.name == name).first() -``` - -### Create Function - -```python -def create( - project_id: str, - name: str, - created_by: str, - with_commit: bool = False, - created_at: Optional[datetime] = None, - # ... other parameters -) -> Entity: - entity = Entity( - project_id=project_id, - name=name, - created_by=created_by, - created_at=created_at, - # ... other fields - ) - - general.add(entity, with_commit) - return entity -``` - -**Rules:** -- Always accept `with_commit` parameter (default `False`) -- Accept optional `created_at` for testing/time travel scenarios -- Use `general.add()` to add to session -- Return the created entity - -### Update Function - -```python -def update( - project_id: str, - entity_id: str, - name: Optional[str] = None, - description: Optional[str] = None, +--- +alwaysApply: true +--- with_commit: bool = True, # ... other optional parameters ) -> Entity: diff --git a/.cursor/rules/database-operations.mdc b/.cursor/rules/database-operations.mdc index d970d7e..9b5e690 100644 --- a/.cursor/rules/database-operations.mdc +++ b/.cursor/rules/database-operations.mdc @@ -1,3 +1,7 @@ +--- +alwaysApply: true +--- + # Database Operations Guidelines Rules and conventions for database operations, session management, and transactions. diff --git a/.cursor/rules/entities.mdc b/.cursor/rules/entities.mdc index 2b13f92..a0bc3a7 100644 --- a/.cursor/rules/entities.mdc +++ b/.cursor/rules/entities.mdc @@ -1,3 +1,7 @@ +--- +alwaysApply: true +--- + # Entity Model Guidelines Rules and conventions for defining SQLAlchemy entity models in `models.py`. diff --git a/.cursor/rules/enums.mdc b/.cursor/rules/enums.mdc index 272eec1..e6f3e6a 100644 --- a/.cursor/rules/enums.mdc +++ b/.cursor/rules/enums.mdc @@ -1,3 +1,7 @@ +--- +alwaysApply: true +--- + # Enum Guidelines Rules and conventions for enum definitions in `enums.py`. diff --git a/.cursor/rules/guidelines.mdc b/.cursor/rules/guidelines.mdc index 4abe82c..4f3add0 100644 --- a/.cursor/rules/guidelines.mdc +++ b/.cursor/rules/guidelines.mdc @@ -1,3 +1,7 @@ +--- +alwaysApply: true +--- + # Model Guidelines This directory contains guidelines and conventions for the `submodules/model` module. These guidelines ensure consistency, maintainability, and correctness across all database models, business objects, and related code. From d0c73b8b22783eebe2caa7254dba507e15bbdab5 Mon Sep 17 00:00:00 2001 From: Lina Date: Tue, 3 Feb 2026 17:00:55 +0100 Subject: [PATCH 3/4] Improvements cursor files --- .cursor/rules/business-objects.mdc | 147 +++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/.cursor/rules/business-objects.mdc b/.cursor/rules/business-objects.mdc index 76c9009..b7d0c92 100644 --- a/.cursor/rules/business-objects.mdc +++ b/.cursor/rules/business-objects.mdc @@ -1,6 +1,153 @@ --- alwaysApply: true --- + +# Business Objects Guidelines + +Rules and conventions for business object modules in `business_objects/`, `cognition_objects/`, and `global_objects/`. + +## Module Organization + +### Directory Structure + +- **`business_objects/`**: Business domain entities (User, Project, Organization, etc.) +- **`cognition_objects/`**: Cognition domain entities (Conversation, Message, Strategy, etc.) +- **`global_objects/`**: Global/shared entities (AdminQueries, EtlTask, etc.) +- **`integration_objects/`**: Integration-specific entities (SharePoint, GitHub, PDF, etc.) with specialized CRUD patterns + +### File Naming + +- One file per entity +- File name matches entity name in `snake_case` +- Example: `user.py` for `User` entity, `conversation.py` for `Conversation` entity + +## Standard CRUD Functions + +Every business object module should provide these standard functions: + +### Get Functions + +#### Single Entity + +```python +def get(project_id: str, entity_id: str) -> Entity: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id, Entity.id == entity_id) + .first() + ) +``` + +#### By ID Only + +```python +def get(entity_id: str) -> Entity: + return session.query(Entity).get(entity_id) +``` + +#### By ID Only (Alternative Pattern) + +Some entities use `get_by_id()` when only ID is needed (no project_id scope): + +```python +def get_by_id(entity_id: str) -> Entity: + return session.query(Entity).filter(Entity.id == entity_id).first() +``` + +#### Multiple Entities + +```python +def get_all(project_id: str) -> List[Entity]: + return session.query(Entity).filter(Entity.project_id == project_id).all() + +def get_by_id_list(entity_ids: List[str]) -> List[Entity]: + return session.query(Entity).filter(Entity.id.in_(entity_ids)).all() +``` + +#### Existence Check + +```python +def exists(project_id: str, entity_id: str) -> bool: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id, Entity.id == entity_id) + .first() + is not None + ) +``` + +#### Scoped Queries + +For user-scoped queries (entities owned by a user): + +```python +def get_scoped(project_id: str, entity_id: str, user_id: str) -> Entity: + return ( + session.query(Entity) + .filter( + Entity.project_id == project_id, + Entity.id == entity_id, + Entity.created_by == user_id, + ) + .first() + ) +``` + +#### Count Queries + +```python +def get_count(project_id: str) -> int: + return ( + session.query(Entity) + .filter(Entity.project_id == project_id) + .count() + ) +``` + +#### Get By Name/Field + +```python +def get_by_name(name: str) -> Entity: + return session.query(Entity).filter(Entity.name == name).first() +``` + +### Create Function + +```python +def create( + project_id: str, + name: str, + created_by: str, + with_commit: bool = False, + created_at: Optional[datetime] = None, + # ... other parameters +) -> Entity: + entity = Entity( + project_id=project_id, + name=name, + created_by=created_by, + created_at=created_at, + # ... other fields + ) + + general.add(entity, with_commit) + return entity +``` + +**Rules:** +- Always accept `with_commit` parameter (default `False`) +- Accept optional `created_at` for testing/time travel scenarios +- Use `general.add()` to add to session +- Return the created entity + +### Update Function + +```python +def update( + project_id: str, + entity_id: str, + name: Optional[str] = None, + description: Optional[str] = None, with_commit: bool = True, # ... other optional parameters ) -> Entity: From 158ea510eba6f32c3a2e1535567bee8b807339f3 Mon Sep 17 00:00:00 2001 From: Lina Date: Tue, 3 Feb 2026 17:29:33 +0100 Subject: [PATCH 4/4] Submodules model improvements --- .cursor/rules/business-objects.mdc | 88 ++------------------------- .cursor/rules/database-operations.mdc | 62 ++----------------- .cursor/rules/entities.mdc | 29 ++------- .cursor/rules/enums.mdc | 48 ++------------- 4 files changed, 23 insertions(+), 204 deletions(-) diff --git a/.cursor/rules/business-objects.mdc b/.cursor/rules/business-objects.mdc index b7d0c92..9dc5011 100644 --- a/.cursor/rules/business-objects.mdc +++ b/.cursor/rules/business-objects.mdc @@ -76,23 +76,6 @@ def exists(project_id: str, entity_id: str) -> bool: ) ``` -#### Scoped Queries - -For user-scoped queries (entities owned by a user): - -```python -def get_scoped(project_id: str, entity_id: str, user_id: str) -> Entity: - return ( - session.query(Entity) - .filter( - Entity.project_id == project_id, - Entity.id == entity_id, - Entity.created_by == user_id, - ) - .first() - ) -``` - #### Count Queries ```python @@ -287,27 +270,6 @@ def get_with_relations(project_id: str) -> List[Entity]: ) ``` -### Subqueries - -Use subqueries for complex filtering: - -```python -def get_by_subquery(project_id: str, user_id: str) -> List[Entity]: - teams_subquery = ( - session.query(TeamMember.team_id) - .filter(TeamMember.user_id == user_id) - .subquery() - ) - return ( - session.query(Entity) - .join(TeamMember, TeamMember.entity_id == Entity.id) - .filter( - Entity.project_id == project_id, - TeamMember.team_id.in_(teams_subquery), - ) - .all() - ) -``` ## SQL Injection Prevention @@ -450,41 +412,14 @@ def create(project_id: str, name: str, **kwargs) -> Entity: # ... create entity ``` -## Helper Functions - -### Setting Values - -Use `set_values_on_item()` helper: - -```python -from .util import set_values_on_item - -def update(entity_id: str, **kwargs) -> Entity: - entity = get(entity_id) - return set_values_on_item(entity, **kwargs) -``` - -### Database Time - -Use `get_db_now()` for consistent timestamps: - -```python -from .util import get_db_now - -created_at = get_db_now() -``` ## Integration Objects Pattern -The `integration_objects/` directory contains specialized patterns for integration entities: - -### Manager Pattern - -Integration objects use a manager pattern with type-based model selection: +The `integration_objects/` directory contains specialized patterns for integration entities using a manager pattern with type-based model selection: ```python from ..integration_objects.manager import get, create, update, delete_many -from ..integration_objects.helper import get_supported_metadata_keys +from ..integration_objects.helper import get_supported_metadata # Get integration model type based on integration IntegrationModel = integration_model(integration_id=integration_id) @@ -492,14 +427,6 @@ IntegrationModel = integration_model(integration_id=integration_id) # Use manager functions with model type records = get(IntegrationModel, integration_id) record = create(IntegrationModel, created_by=user_id, integration_id=integration_id, ...) -``` - -### Metadata Handling - -Integration objects have dynamic metadata fields: - -```python -from ..integration_objects.helper import get_supported_metadata # Filter metadata to only supported keys supported_metadata = get_supported_metadata(table_name, raw_metadata) @@ -508,7 +435,6 @@ supported_metadata = get_supported_metadata(table_name, raw_metadata) **Rules:** - Use `integration_objects/manager.py` for CRUD operations - Use `integration_objects/helper.py` for metadata validation -- Metadata keys are defined per integration type in `helper.py` ## Best Practices @@ -518,9 +444,7 @@ supported_metadata = get_supported_metadata(table_name, raw_metadata) 4. **Use type hints** for all function parameters and return types 5. **Handle None cases** appropriately (return None or raise exception) 6. **Use `flag_modified()`** for JSON field updates -7. **Prevent SQL injection** in all queries -8. **Cache appropriately** for read-heavy operations -9. **Group related queries** in the same module -10. **Keep functions focused** - one responsibility per function -11. **Use `exists()`** for existence checks instead of checking `get()` result -12. **Use `get_count()`** for counting entities instead of `len(get_all())` +7. **Prevent SQL injection** in all queries using parameterized queries or `prevent_sql_injection()` +8. **Cache appropriately** for read-heavy operations using `TTLCacheDecorator` +9. **Use `exists()`** for existence checks instead of checking `get()` result +10. **Use `get_count()`** for counting entities instead of `len(get_all())` diff --git a/.cursor/rules/database-operations.mdc b/.cursor/rules/database-operations.mdc index 9b5e690..6f75e24 100644 --- a/.cursor/rules/database-operations.mdc +++ b/.cursor/rules/database-operations.mdc @@ -295,54 +295,15 @@ columns = general.construct_select_columns( ) ``` -### `simple_selection_builder()` - -Build simple SELECT queries: - -```python -from ..business_objects import general - -query = general.simple_selection_builder( - table="user", - exclude_columns=["password"], - where_condition="role = 'ENGINEER'", - order_by="created_at DESC", -) -``` - -## Debugging - -### Print ORM Query - -Print the SQL generated by ORM queries: - -```python -from ..business_objects import general - -query = session.query(User).filter(User.role == "ENGINEER") -general.print_orm_query(query) -``` - -### Get Dialect - -```python -from ..business_objects import general - -dialect = general.get_dialect() -``` - ## Best Practices 1. **Always use `general.add()`** instead of `session.add()` directly -2. **Use `with_commit` parameter** to control transaction boundaries -3. **Default `with_commit=False`** for create/delete, `True` for update -4. **Rollback on errors** to maintain data consistency -5. **Use ORM queries** when possible (type-safe, prevents SQL injection) -6. **Prevent SQL injection** in all raw SQL queries -7. **Expunge and make transient** cached objects -8. **Don't hold sessions** longer than necessary -9. **Use batch operations** (`add_all`) when adding multiple entities -10. **Refresh objects** when you need latest data from database +2. **Use `with_commit` parameter** to control transaction boundaries (default `False` for create/delete, `True` for update) +3. **Rollback on errors** to maintain data consistency +4. **Use ORM queries** when possible (type-safe, prevents SQL injection) +5. **Prevent SQL injection** in all raw SQL queries using `prevent_sql_injection()` +6. **Expunge and make transient** cached objects +7. **Use batch operations** (`add_all`) when adding multiple entities ## Common Patterns @@ -368,14 +329,3 @@ def create_batch(entities_data: List[Dict]) -> List[Entity]: general.add_all(entities, with_commit=True) return entities ``` - -### Safe Query Pattern - -```python -def get_safe(project_id: str, entity_id: str) -> Optional[Entity]: - try: - return get(project_id, entity_id) - except Exception: - general.rollback() - return None -``` diff --git a/.cursor/rules/entities.mdc b/.cursor/rules/entities.mdc index a0bc3a7..68f18fe 100644 --- a/.cursor/rules/entities.mdc +++ b/.cursor/rules/entities.mdc @@ -136,41 +136,24 @@ Use the helper function for 1:N relationships: ```python from ..models import parent_to_child_relationship +# Basic relationship projects = parent_to_child_relationship( Tablenames.ORGANIZATION, Tablenames.PROJECT, ) -``` - -### Cascade Behavior - -Specify cascade behavior: -```python -# Keep parent when child is deleted (default) +# With ordering projects = parent_to_child_relationship( - Tablenames.ORGANIZATION, + Tablenames.USER, Tablenames.PROJECT, - cascase_behaviour=CascadeBehaviour.KEEP_PARENT_ON_CHILD_DELETION, + order_by="created_at.desc()", ) -# Delete both if either is deleted +# With cascade behavior projects = parent_to_child_relationship( Tablenames.ORGANIZATION, Tablenames.PROJECT, - cascase_behaviour=CascadeBehaviour.DELETE_BOTH_IF_EITHER_IS_DELETED, -) -``` - -### Ordering - -Specify `order_by` for relationships: - -```python -projects = parent_to_child_relationship( - Tablenames.USER, - Tablenames.PROJECT, - order_by="created_at.desc()", + cascase_behaviour=CascadeBehaviour.KEEP_PARENT_ON_CHILD_DELETION, ) ``` diff --git a/.cursor/rules/enums.mdc b/.cursor/rules/enums.mdc index e6f3e6a..4d2a8f7 100644 --- a/.cursor/rules/enums.mdc +++ b/.cursor/rules/enums.mdc @@ -228,47 +228,9 @@ from .enums import ( 1. **Always inherit from `EnumKern`** for helper methods 2. **Use descriptive names** that clearly indicate purpose -3. **Keep values consistent** - prefer matching member names -4. **Add to `Tablenames` enum** when creating new tables -5. **Use `.value`** when storing in database columns -6. **Use enum comparisons** instead of string comparisons -7. **Document complex enums** with comments -8. **Group related enums** together in the file -9. **Use `try_parse_enum_value()`** for safe parsing -10. **Keep enum values stable** - don't change existing values (add new ones instead) +3. **Add to `Tablenames` enum** when creating new tables +4. **Use `.value`** when storing in database columns +5. **Use enum comparisons** instead of string comparisons +6. **Use `try_parse_enum_value()`** for safe parsing +7. **Keep enum values stable** - don't change existing values (add new ones instead) -## Common Patterns - -### Enum with Methods - -```python -class UserRoles(EnumKern): - ENGINEER = "ENGINEER" - EXPERT = "EXPERT" - ANNOTATOR = "ANNOTATOR" - - def is_admin(self) -> bool: - return self in [UserRoles.ENGINEER, UserRoles.EXPERT] -``` - -### Enum Validation - -```python -def validate_role(role: str) -> UserRoles: - try: - return UserRoles.from_string(role) - except ValueError: - raise ValueError(f"Invalid role: {role}") -``` - -### Enum in Type Hints - -```python -from typing import Literal - -UserRoleType = Literal[ - UserRoles.ENGINEER.value, - UserRoles.EXPERT.value, - UserRoles.ANNOTATOR.value, -] -```