diff --git a/.gitignore b/.gitignore index fe32cff..0e50918 100644 --- a/.gitignore +++ b/.gitignore @@ -15,24 +15,18 @@ output.xml log.html report.html pabot_results/ -*.png *diagnosis.json .robotframework/ # Salesforce CLI .sf/ .sfdx/ -org_info.json deploy-options.json *.auth.json # Downloads & temp -downloads/* -!downloads/.gitkeep output/* !output/.gitkeep -chromedriver.exe -chromedriver .pabotsuitenames @@ -54,10 +48,6 @@ Desktop.ini *.swp *.swo -# Generated docs/files from runs (if created at root) -CDL_DOC -CV_DOC -smoke_doc # UUID-like run folders that may end up at repo root ???????????????????????????????? \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c0206ff..705b0e4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,7 +44,7 @@ sf org login web Before submitting changes, ensure all tests pass: ```bash -robot src/robot/tests/ +robot src/robot/orchestrator/ ``` --- diff --git a/README.md b/README.md index fa50716..ca25603 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,10 @@ Native Salesforce tools have limitations: This design ensures predictable, scalable, and observable execution. +

+ +

+ ## Technology Stack - Robot Framework @@ -121,7 +125,7 @@ pip install -r requirements.txt ``` 2. Run: ```bash - robot -d results --variable ORG_ALIAS: src/robot/tests/Test.robot + robot -d results --variable ORG_ALIAS: src/robot/orchestrator/scan.robot ``` 3. Check outputs: ```text @@ -134,8 +138,8 @@ pip install -r requirements.txt ``` salesforce-objects-scanner/ -├── output/ # Generated JSON + Excel reports -├── results/ # Robot execution logs +├── output/ # Generated JSON + Excel reports +├── results/ # Robot execution logs │ ├── log.html │ ├── output.xml │ └── report.html @@ -143,11 +147,11 @@ salesforce-objects-scanner/ │ └── robot/ │ ├── libraries/ │ │ └── ExcelWriter.py -│ └── tests/ -│ └── Test.robot -├── Support.robot # Core logic +│ ├── orchestrator/ +│ │ └── scan.robot +│ └── resources/ +│ └── keywords.robot # Core logic ├── .gitignore -├── .pabotsuitenames # Pabot suite cache file ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── README.md diff --git a/ci/robot/smoke.robot b/ci/robot/smoke.robot index 02c3280..385363f 100644 --- a/ci/robot/smoke.robot +++ b/ci/robot/smoke.robot @@ -4,7 +4,7 @@ Library OperatingSystem Library Collections Library BuiltIn Library String -Resource ../../src/robot/tests/Support.robot +Resource ../../src/robot/resources/keywords.robot *** Keywords *** Is Windows diff --git a/docs/architecture.md b/docs/architecture.md index 6505864..e39a04c 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -2,10 +2,242 @@ ## Overview -# sf-org-object-scanner +The **Salesforce Objects Scanner Tool** is a Robot Framework–based automation solution designed to analyze Salesforce org data footprint by retrieving record counts across all queryable objects. -sf-org-object-scanner: A Salesforce CLI utility to scan all queryable sObjects in an org and retrieve record counts per object. Ideal for data volume analysis, storage optimization, LDV risk identification, org health checks, migration planning, and cleanup prioritization. +The architecture focuses on **safe execution, structured outputs, and scalability**, making it suitable for large Salesforce environments with hundreds to thousands of objects. -**Author:** Bhimeswara Vamsi Punnam +The solution combines: +- Salesforce CLI (`sf`) for metadata discovery and query execution +- Robot Framework for orchestration and control flow +- Process-based execution with timeout safeguards +- Structured JSON outputs and Excel reporting for analysis -**Role:** Lead SDET / Automation Architect +--- + +## Why This Architecture + +Salesforce does not provide a single unified way to retrieve record counts across all objects efficiently. + +Key challenges: +- Large number of objects (standard + custom + tooling) +- Some objects require filters or are not queryable +- Long-running queries can block execution + +This architecture addresses these challenges by: + +- Using Salesforce CLI for consistent and authenticated query execution +- Applying timeout protection to prevent long-running failures +- Classifying skipped objects for transparency +- Generating structured outputs for downstream analysis + +--- + +## High-Level Architecture + +### Architecture Breakdown + +The system follows a layered execution model: + +

+ +

+ +--- + +### Control Layer (Salesforce CLI) + +- Uses `sf sobject list --json` to discover queryable objects +- Executes `SELECT COUNT()` queries via CLI +- Handles authentication using Salesforce CLI session + +--- + +### Orchestration Layer (Robot Framework) + +- Coordinates full scan workflow +- Applies filtering logic for unsupported objects +- Handles retry-safe, deterministic execution +- Manages logging and reporting + +--- + +### Execution Layer + +- Executes queries per object +- Applies per-query timeout protection +- Tracks execution duration +- Ensures controlled and predictable runtime + +--- + +### Output Layer + +- JSON artifacts: + - `data.json` + - `tooling.json` + - `skipped.json` + - `durations.json` +- Excel report: + - `SF_Objects_.xlsx` + +--- + +## Repository Structure + +``` +salesforce-objects-scanner/ +├── output/ # Generated JSON + Excel reports +├── results/ # Robot execution logs +│ ├── log.html +│ ├── output.xml +│ └── report.html +├── src/ +│ └── robot/ +│ ├── libraries/ +│ │ └── ExcelWriter.py +│ ├── orchestrator/ +│ │ └── scan.robot +│ └── resources/ +│ └── keywords.robot # Core logic +├── .gitignore +├── CODE_OF_CONDUCT.md +├── CONTRIBUTING.md +├── README.md +├── requirements.txt +└── SECURITY.md + +``` + + +--- + +## Folder Responsibilities + +- **docs/** – Architecture and design documentation +- **src/robot/** – Core test suites and libraries +- **output/** – Generated JSON and Excel reports +- **results/** – Robot Framework logs and reports +- **ci/** – CI test suites + +--- + +## Execution Model + +### Authentication + +- Managed via Salesforce CLI (`sf org login web`) +- No credentials stored in code +- Session-based authentication reused across commands + +--- + +### Object Discovery + +- Retrieve all objects via CLI +- Filter: + - Non-queryable objects + - Unsupported types + - Known noisy patterns + +--- + +### Query Execution Flow + +1. Discover objects +2. Filter unsupported objects +3. Execute `SELECT COUNT()` per object +4. Apply timeout control +5. Capture success or skip reason +6. Track execution duration +7. Persist results + +--- + +## Failure and Handling Model + +- Objects that fail are classified into: + - `COUNT_NOT_SUPPORTED` + - `REQUIRES_WHERE` + - `INVALID_TYPE` +- No retry amplification (predictable execution) +- Failures are recorded in `skipped.json` +- Execution continues without interruption + +--- + +## Security Architecture + +- Authentication delegated to Salesforce CLI +- No credentials stored in repository +- Uses existing authenticated sessions +- Sensitive files excluded via `.gitignore` + +--- + +## Runtime vs Source Separation + +| Category | Location | Notes | +|----------------|-------------|--------------------------------| +| Source code | `src/robot/`| Version-controlled | +| Outputs | `output/` | Generated at runtime | +| Reports | `results/` | Execution logs | +| CI tests | `ci/` | Smoke test automation | +| Documentation | `docs/` | Architecture & design | + +--- + +## Design Principles + +- Deterministic execution (no retries) +- Timeout-controlled processing +- Clear separation of concerns +- Structured and traceable outputs +- Scalable for large orgs +- CLI-based authentication (no secrets in code) +- CI/CD compatible and headless execution ready + +--- + +## Scalability Considerations + +- Handles hundreds to thousands of objects +- Sequential execution ensures stability +- Future-ready for parallel execution (Pabot) +- Performance depends on: + - Org size + - Network latency + - Query response time + +--- + +## Extensibility + +The framework can be extended with: + +- Additional filters for object classification +- Parallel execution support (Pabot) +- Custom analytics on output data +- Integration with dashboards or databases + +--- + +## Observability and Monitoring + +- Robot Framework HTML reports (`log.html`, `report.html`) +- JSON outputs for structured analysis +- Execution duration tracking per object +- Clear success vs skip visibility + +--- + +## Deployment Model + +- Local developer environments +- CI/CD pipelines (GitHub Actions, Jenkins) +- Headless execution environments +- Containerized environments (future scope) + +--- + +**Author:** Bhimeswara Vamsi Punnam +**Role:** Lead Software Development Engineer in Test (SDET) diff --git a/docs/architecture.png b/docs/architecture.png new file mode 100644 index 0000000..dd0cffc Binary files /dev/null and b/docs/architecture.png differ diff --git a/src/robot/tests/Test.robot b/src/robot/orchestrator/scan.robot similarity index 59% rename from src/robot/tests/Test.robot rename to src/robot/orchestrator/scan.robot index f4931c3..f8b48f7 100644 --- a/src/robot/tests/Test.robot +++ b/src/robot/orchestrator/scan.robot @@ -1,6 +1,7 @@ *** Settings *** Documentation Retrieve and log record counts for Salesforce objects from Excel list -Resource Support.robot +Resource ../resources/keywords.robot +Suite Teardown Cleanup Suite *** Test Cases *** Object_Scanner diff --git a/src/robot/tests/Support.robot b/src/robot/resources/keywords.robot similarity index 96% rename from src/robot/tests/Support.robot rename to src/robot/resources/keywords.robot index 997fcff..ed0da02 100644 --- a/src/robot/tests/Support.robot +++ b/src/robot/resources/keywords.robot @@ -45,6 +45,7 @@ ${DISCOVER_TOOLING_OBJECTS} ${TRUE} @{COUNT_NOT_SUPPORTED_OBJECTS} DataEncryptionKey # Known objects that require additional WHERE clause (avoid wasting time; classify clearly) @{REQUIRES_WHERE_OBJECTS} DataStatistics +@{TEMP_FILES} PIPE log.html output.xml report.html *** Keywords *** Check Prerequisites @@ -523,3 +524,22 @@ Save Results To Excel Log To Console ${result.stdout} Log To Console ${result.stderr} Should Be Equal As Integers ${result.rc} 0 Excel generation failed:\n${result.stdout}\n${result.stderr} + +Cleanup Runtime Artifacts + [Documentation] Cleans Pabot temp files, Excel handles, and process artifacts from project root. + ${items}= List Directory ${EXECDIR} + FOR ${item} IN @{items} + ${full_path}= Set Variable ${EXECDIR}${/}${item} + ${is_uuid}= Evaluate len($item) == 32 and all(c in "0123456789abcdef" for c in $item) + ${is_known_temp}= Evaluate $item in $TEMP_FILES + IF ${is_uuid} or ${is_known_temp} + ${is_file}= Run Keyword And Return Status File Should Exist ${full_path} + IF ${is_file} + Log Removing temp file: ${item} + Remove File ${full_path} + END + END + END + +Cleanup Suite + Cleanup Runtime Artifacts