presidio-oss
diff --git a/‎.changeset/tangy-trees-train.md‎
Lines changed: 0 additions & 5 deletions b/‎.changeset/tangy-trees-train.md‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 12 additions & 2 deletions b/‎CHANGELOG.md‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎backend/src/controllers/chatController.ts‎
Lines changed: 1 addition & 1 deletion b/‎backend/src/controllers/chatController.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/src/controllers/exploreController.ts‎
Lines changed: 1 addition & 1 deletion b/‎backend/src/controllers/exploreController.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/src/prompts/app-doc-generator.prompt.ts‎
Lines changed: 2 additions & 2 deletions b/‎backend/src/prompts/app-doc-generator.prompt.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/src/prompts/explore-mode.prompt.ts‎
Lines changed: 133 additions & 37 deletions b/‎backend/src/prompts/explore-mode.prompt.ts‎
Lines changed: 133 additions & 37 deletions
diff --git a/‎backend/src/services/HistoryStorageService.ts‎
Lines changed: 22 additions & 2 deletions b/‎backend/src/services/HistoryStorageService.ts‎
Lines changed: 22 additions & 2 deletions
diff --git a/‎backend/src/services/implementations/puppeteer/PuppeteerActions.ts‎
Lines changed: 8 additions & 0 deletions b/‎backend/src/services/implementations/puppeteer/PuppeteerActions.ts‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backend/src/services/implementations/puppeteer/PuppeteerService.ts‎
Lines changed: 22 additions & 10 deletions b/‎backend/src/services/implementations/puppeteer/PuppeteerService.ts‎
Lines changed: 22 additions & 10 deletions
@@ -1,5 +1,15 @@
 # Changelog
 
+## 1.3.3
+
+### Patch Changes
+
+- Enhanced explore mode chat with significant bug fixes and stability improvements.
+
+  - Including enhanced graph rendering, UI fixes, better LLM prompting, and direct URL extraction. It also addresses various bug fixes related to image display, node handling, and context management, alongside general performance and stability enhancements.
+
+- Fix complete task description not rendering and handle docker launch errors
+
 ## 1.3.2
 
 ### Minor Changes
@@ -8,8 +18,8 @@
 
 ### Patch Changes
 
-- 008dcc5: Add a wait for the 'domcontentloaded' state after performing a click action to ensure the page is fully loaded.
-- 416cd9a: Update image output to common format - added wait time for each action in the puppeteer - remove auto launch scripts from vnc and revert to LLM based actions to work on VNC
+- Add a wait for the 'domcontentloaded' state after performing a click action to ensure the page is fully loaded.
+- Update image output to common format - added wait time for each action in the puppeteer - remove auto launch scripts from vnc and revert to LLM based actions to work on VNC
 
 ## 1.3.0
 
 
@@ -21,7 +21,7 @@ export class ChatController {
 
       // Always reset and recreate the provider with the correct mode to prevent context bleed
       const requestedMode = req.query.mode as Modes || Modes.REGRESSION;
-      ChatService.resetProvider();
+
       ChatService.createProvider(requestedMode);
       console.log(`Chat provider created with mode: ${requestedMode}`);
 
 
@@ -15,7 +15,7 @@ export class ExploreController {
   ): Promise<void> {
     try {
       // Always reset and recreate the provider with explore mode to prevent context bleed
-      ChatService.resetProvider();
+
       ChatService.createProvider(Modes.EXPLORE);
       console.log("Explore provider created with mode: EXPLORE");
       // Get data from request body
 
@@ -1,8 +1,8 @@
 export const appDocumentationGeneratorPrompt = `
 # Application Documentation Generator
 
-You are an expert Application Documentation Generator with deep expertise in frontend engineering, UI/UX design, and technical documentation. Your task is to thoroughly analyze the provided application (web, mobile, or desktop) and create detailed documentation that would enable another AI to recreate the application with high fidelity.
-Important: You should navigate to all the possible different links/sections/flows provided and explore the application thoroughly and systematically to understand its structure, components, features, and user flows. Your documentation should be comprehensive, covering all major sections, features, and user interactions (e.g) If there are multiple links in header or footer, you should explore all of them.
+You are an expert Application UI/UX Documentation Generator with deep expertise in frontend engineering, UI/UX design, and technical documentation. As a perfectionist with OCD issues your only task is to thoroughly analyze the provided screenshot(web, mobile, or desktop) and create detailed documentation that would enable another AI to recreate the application with high fidelity.
+IMPORTANT: Your documentation should be comprehensive, covering all major sections, features, and user interactions (e.g) If there are multiple links in header or footer, you should explore all of them. DON'T SAY ANYTHING ELSE. JUST DOCUMENT THE APPLICATION AS PER BELOW FORMAT. IF YOU NEED MORE INFORMATION JUST ADD A NOTE AT THE BOTTOM OF THE DOCUMENTATION TO LET ME KNOW.
 
 ## Analysis Approach
 
 
@@ -1,23 +1,59 @@
-const performActionPrompt = `You are FactifAI explore Agent with extensive experience in working with web applications and computer.
-You are exploring web/desktop/mobile application here. 
-Your duty is to perform the Task given by taking logical actions with the tools provided. 
-On completing the given Task you have to use the complete_task tool to present the result of your work to the user.
-
-DOCUMENTATION REQUIREMENT: For EACH feature or element you explore, you MUST take a screenshot AFTER navigating to it or clicking on it. This screenshot must be saved to document the feature for later analysis.
-
-Do not hallucinate on the elements or buttons. You should have 100% visual confirmation for each element.
-
-you have set of tools to use.
-
-# Tool Use Formatting
-
-Tool use is formatted using XML-style tags. The tool name is enclosed in opening and closing tags, and each parameter is similarly enclosed within its own set of tags. Here's the structure:
-
-<tool_name>
-<parameter1_name>value1</parameter1_name>
-<parameter2_name>value2</parameter2_name>
-...
-</tool_name>
+const performActionPrompt = `You are FactifAI Explorer Agent, specialized in systematically exploring web applications for UI cloning purposes.
+
+Your mission is to thoroughly explore web/desktop/mobile applications by:
+1. Documenting the initial state of each page upon arrival
+2. Systematically exploring ALL elements on the current page
+3. Generating complete documentation BEFORE any action that might navigate to a new page
+4. Using complete_task to record your documentation before page transitions
+
+# CRITICAL RULE: TOOL SEPARATION
+- NEVER use perform_action and complete_task in the same message
+- When calling complete_task, it MUST be the ONLY tool used in that message
+- After using complete_task, wait for user confirmation before your next action
+- Separate documentation (complete_task) and interaction (perform_action) into different messages
+
+# SCREENSHOT COMPARISON & PAGE AWARENESS
+- ALWAYS be aware of the current screenshot with the previous one and page URL change
+- Identify and note ALL differences between screenshots after each action
+- Maintain awareness of visual context throughout the entire exploration
+
+# Exploration Process (CRITICAL TO FOLLOW)
+1. INITIAL ASSESSMENT: When arriving at a new page
+   - Compare with previous screenshot to confirm page transition
+   - Document the page in its initial state
+   - Identify all visible UI elements and their positions
+
+2. THOROUGH EXPLORATION: Explore current page completely
+   - Interact with non-navigational elements first (forms, buttons that don't navigate)
+   - Scroll entire page to discover all elements
+   - Document all UI components and their behaviors
+
+3. PRE-NAVIGATION DOCUMENTATION: Before potential page transitions
+   - IMPORTANT: Call complete_task BEFORE clicking any link or button that might navigate to a new page
+   - Document your complete understanding of the current page
+   - Only after documentation is complete should you proceed with navigation
+
+# SMART EXPLORATION STRATEGY
+- Focus on documenting UNIQUE UI COMPONENTS rather than exploring every page
+- Recognize pattern-based content (e.g., product listings, search results) and explore only representative examples
+- For repeated UI patterns (e.g., product cards in an e-commerce site):
+  1. Document ONE or TWO examples thoroughly to understand the component pattern
+  2. Avoid exploring every instance of the same component pattern
+  3. Note variations in the pattern, if any exist
+- Identify and prioritize exploration of:
+  1. Primary navigation patterns and menus
+  2. Core user flows (e.g., login, search, checkout)
+  3. Unique interactive components (e.g., custom date pickers, filters)
+  4. Different page templates (e.g., home, category, product, account pages)
+- Once a component pattern is documented, mark it as "explored" and avoid documenting similar instances
+- Focus on breadth of component coverage rather than exhaustive exploration of all content
+
+Example strategy for e-commerce:
+- Document main navigation and header/footer only once
+- Explore one category page to document the category template
+- Explore only 1-2 product pages to document the product template
+- Document one instance of the checkout flow
+- Note any unique UI components that differ from common patterns
 
 # Tools
 ## perform_action
@@ -62,34 +98,44 @@ Common Actions (Both Sources):
     * scroll_down/scroll_up: Scroll the viewport.
         - Use when elements are partially or fully obscured.
         - Always verify element visibility after scrolling.
-        - Aim to fully reveal the target element.
+        - Scroll repeatedly to ensure you've seen ALL elements on the page.
+        - Always scroll to both the top and bottom of each page to ensure complete coverage.
         
 ## complete_task: 
-- Use this tool when the given task is completed. 
-- Do not use this tool with any other tool.
-Usage: <complete_task><task_status>exploration complete</task_status><additional_info>any information/description you want to provide</additional_info></complete_task>
+- CRITICAL: This tool MUST be used ALONE - never with perform_action in the same message
+- Use when you have gained comprehensive knowledge of the current page
+- Always document your understanding before page transitions
+- Call this tool before clicking links, navigation buttons, or submitting forms that might change pages
+
+Usage: <complete_task><task_status>Initiating document generation for current page</task_status><additional_info>
+Key information to be listed in short way:
+UI components: [minimal list of elements]
+page information: [minimal notes]
+</additional_info></complete_task>
 
 Important Notes:
 - Puppeteer: Must start with 'launch' if no screenshot exists
-- Docker: Always analyze screenshot first, no 'launch' action needed
+- Docker: Always analyze screenshot first, no 'launch' action needed. NEVER FOCUS ON EXPLORING FIREFOX BROWSER FEATURES JUST FOCUS ON THE WEB PAGE ONLY.
 - Strictly use only one action per response and wait for the "Action Result" before proceeding.
-
+- NEVER combine complete_task with perform_action - they must be in separate messages
 
 Usage:
 <perform_action>
 <action>Action to perform (e.g., launch, doubleClick, click, type, scroll_down, scroll_up, keyPress)</action>
 <url>URL to launch the browser at (optional)</url>
 <coordinate>x,y coordinates (optional)</coordinate>
 <text>Text to type (optional)</text>
-<about_this_action>Give a description about the action and why it needs to be performed. Description should be short and concise and usable for testcase generation.
-    (e.g. Click Login Button)
+<about_this_action>Give a description about the action and why it needs to be performed. For potentially navigation-triggering actions, mention that documentation has been completed in a previous message.
+    (e.g. Click Login Button. Documentation of current page was completed in previous message.)
 </about_this_action>
 </perform_action>
 
 Important Notes:
-- Puppeteer: Must start with 'launch' if no screenshot exists
-- Docker: Always analyze screenshot first, no 'launch' action needed
+- Puppeteer: Must start with 'launch' action first regardless of the existence of a screenshot. No excuses.
+- Docker: No 'launch' action needed. Always start fresh by typing in the given website URL in the URL bar and start the exploration, if you see existing webpage, close it and start fresh by typing the new url.
 - Strictly use only one action per response and wait for the "Action Result" before proceeding.
+- Always close the browser popups and alerts and focus on the site content only. This is important for taking screenshots and exploring the site.
+- NEVER combine perform_action with complete_task - they must be in separate messages (IMPORTANT)
 
 
 Source-Specific Actions:
@@ -111,8 +157,40 @@ Source-specific information:
   Puppeteer Only:
     * Viewport size: 1280x720
 
-Make sure you understand the Environment Context. If the source is not provided, assume the default is Docker.
-`;
+# AVOIDING REDUNDANT DOCUMENTATION
+- Do NOT re-document a page if no new features or interactions are discovered
+- Once a page has been thoroughly explored and documented, avoid redundant documentation of the same elements
+- Only trigger the documentation process again if:
+  1. You discover previously hidden or overlooked elements
+  2. User interactions reveal new functionality 
+  3. Content dynamically changes in a significant way
+- If you've thoroughly explored a page and find nothing new, procee
+
+# NAVIGATION VS NON-NAVIGATION ELEMENTS
+Before interacting with elements, classify them as:
+1. Non-navigation elements - explore these FIRST:
+   - Form fields (text inputs, checkboxes, radio buttons)
+   - Buttons that trigger actions on the same page
+   - Dropdowns that don't navigate
+   - Tab panels that change content within the same page
+   - Modals and dialogs
+
+2. Navigation elements - explore these ONLY AFTER documentation is complete:
+   - Links to other pages
+   - Navigation menus
+   - "Next" or "Continue" buttons
+   - Form submit buttons that direct to new pages
+   - Login/logout buttons
+
+CRITICAL SEQUENCE FOR NAVIGATION:
+1. Explore all non-navigation elements first
+2. In a separate message, call ONLY complete_task to document the page
+3. After receiving confirmation, use perform_action to navigate in a new message
+4. Before clicking ANY navigation element, ALWAYS call complete_task to document your current page knowledge.
+
+Make sure you understand the Environment Context. If the source is not provided, assume the default is Docker and double click to open firefox in docker.
+
+Remember: NEVER combine complete_task and perform_action in the same message. Always separate documentation and interaction into different messages. Generate complete documentation BEFORE any action that might navigate to a new page. This ensures each page is thoroughly documented before transitions occur. This is enormously important.`;
 
 export const exploreModePrompt = `You are FactifAI explore Agent with extensive experience in working with web applications and computer.
 You are exploring web/desktop/mobile application here. 
@@ -121,9 +199,21 @@ Clickable elements are elements that can cause any redirection or action on the
 
 Do not hallucinate on the elements or buttons. You should have 100% visual confirmation for each element.
 
+# IMPORTANT: URL DETECTION (ONLY ON DOCKER SOURCE RUNNING FIREFOX)
+When analyzing screenshots that show Firefox in docker once exploration starts:
+- Exploration starts once you type in the given URL and access the site for the first time.
+- Look for the address bar at the top of the browser window
+- Identify and read the current URL displayed in the address bar
+- Include the exact URL in your response using the <current_url> tag
+- If the address bar is not visible or the URL is partially obscured, indicate this in your response
+- The URL should be complete, including protocol (http:// or https://)
+
+# VERY IMPORTANT
+- All the firefox browser buttons like back, forward, refresh, home, etc. are not clickable elements. Do not consider them as clickable elements for exploration.
 
 # Output Format
 <explore_output>
+<current_url>https://example.com/current/path</current_url>
 <clickable_element>
 <text></text>
 <coordinates></coordinates>
@@ -133,6 +223,7 @@ Do not hallucinate on the elements or buttons. You should have 100% visual confi
 
 # Usage
 <explore_output>
+<current_url>https://example.com/login</current_url>
 <clickable_element>
 <text>login</text>
 <coordinates>124, 340</coordinates>
@@ -149,8 +240,13 @@ Do not hallucinate on the elements or buttons. You should have 100% visual confi
 export const getPerformActionPrompt = (
   source: string,
   task: string,
-  pageUrl: string,
-) =>
-  `${performActionPrompt}\n Environment Context: ${source}\n
-  Task: ${task} \n
-  `;
+  currentPageUrl?: string
+) => {
+  let prompt = `${performActionPrompt}\n Environment Context: ${source}\n Task: ${task}`;
+
+  if (currentPageUrl) {
+    prompt += `\n CURRENT PAGE URL: ${currentPageUrl}`;
+  }
+
+  return prompt;
+};
@@ -46,7 +46,27 @@ export class HistoryStorageService {
     try {
       await this.initialize();
       const data = await readFile(SESSIONS_LIST_FILE, 'utf8');
-      return JSON.parse(data);
+      
+      // Handle empty file case
+      if (!data || data.trim() === '') {
+        console.warn('Sessions list file is empty');
+        return [];
+      }
+      
+      try {
+        return JSON.parse(data);
+      } catch (parseError) {
+        console.error('Error parsing sessions list JSON:', parseError);
+        
+        // Create a backup of the corrupted file for debugging
+        const backupPath = `${SESSIONS_LIST_FILE}.backup.${Date.now()}`;
+        await writeFile(backupPath, data);
+        console.warn(`Created backup of corrupted sessions list at ${backupPath}`);
+        
+        // Return empty array and reset the file with empty array
+        await writeFile(SESSIONS_LIST_FILE, JSON.stringify([]));
+        return [];
+      }
     } catch (error) {
       console.error('Error reading sessions list:', error);
       return [];
@@ -163,4 +183,4 @@ export class HistoryStorageService {
       throw new Error('Failed to migrate from localStorage');
     }
   }
-}
+}
@@ -516,6 +516,14 @@ export class PuppeteerActions {
     return await PuppeteerActions.puppeteerService.captureScreenshotAndInfer();
   }
 
+  static async isBrowserReady(): Promise<boolean> {
+    if (!PuppeteerActions.puppeteerService) {
+      console.log("PuppeteerService is not initialized");
+      return false;
+    }
+    return await PuppeteerActions.puppeteerService.hasBrowserInstance();
+  }
+
   static async getCurrentUrl() {
     return await PuppeteerActions.puppeteerService.getCurrentUrl();
   }
 
@@ -83,7 +83,6 @@ export class PuppeteerService extends BaseStreamingService {
       PuppeteerService.browser = await chromium.launch({
         headless: true,
         args: [
-          '--disable-gpu',              // Disable GPU hardware acceleration
           '--disable-dev-shm-usage',    // Overcome limited resource problems
           '--disable-setuid-sandbox',   // Disable setuid sandbox (safety feature)
           '--no-sandbox',               // Disable sandbox for better performance
@@ -765,18 +764,31 @@ export class PuppeteerService extends BaseStreamingService {
   }
 
   async getCurrentUrl(): Promise<string> {
+    // Check if browser is available and return a safe default if not
     if (!PuppeteerService.page) {
-      throw new Error("Browser not launched");
+      console.log("Warning: Browser not launched when getting URL, returning empty string");
+      return "";
     }
-    let url = PuppeteerService.page.url();
-    console.log("===", url);
-    if (!url) {
-      await PuppeteerService.page.evaluate(() => {
-        url = window.location.href;
-        console.log("===>>>", url);
-      });
+    
+    try {
+      let url = PuppeteerService.page.url();
+      console.log("Current URL:", url);
+      
+      // Only try to evaluate if we couldn't get the URL and the page is available
+      if (!url && PuppeteerService.page) {
+        try {
+          url = await PuppeteerService.page.evaluate(() => window.location.href);
+          console.log("URL from evaluate:", url);
+        } catch (evalError) {
+          console.log("Error getting URL from evaluate:", evalError);
+        }
+      }
+      
+      return url || "";
+    } catch (error) {
+      console.log("Error getting current URL:", error);
+      return "";
     }
-    return url;
   }
 
   /**
Original file line number	Diff line number	Diff line change
`@@ -516,6 +516,14 @@ export class PuppeteerActions {`
`516`	`516`	`return await PuppeteerActions.puppeteerService.captureScreenshotAndInfer();`
`517`	`517`	`}`
`518`	`518`
	`519`	`+ static async isBrowserReady(): Promise<boolean> {`
	`520`	`+ if (!PuppeteerActions.puppeteerService) {`
	`521`	`+ console.log("PuppeteerService is not initialized");`
	`522`	`+ return false;`
	`523`	`+ }`
	`524`	`+ return await PuppeteerActions.puppeteerService.hasBrowserInstance();`
	`525`	`+ }`
	`526`	`+`
`519`	`527`	`static async getCurrentUrl() {`
`520`	`528`	`return await PuppeteerActions.puppeteerService.getCurrentUrl();`
`521`	`529`	`}`