LianjiaTech
diff --git a/‎api/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎api/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎api/src/main/java/com/theokanning/openai/web/WebCrawlRequest.java‎
Lines changed: 150 additions & 0 deletions b/‎api/src/main/java/com/theokanning/openai/web/WebCrawlRequest.java‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎api/src/main/java/com/theokanning/openai/web/WebCrawlResponse.java‎
Lines changed: 71 additions & 0 deletions b/‎api/src/main/java/com/theokanning/openai/web/WebCrawlResponse.java‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎api/src/main/java/com/theokanning/openai/web/WebExtractRequest.java‎
Lines changed: 113 additions & 0 deletions b/‎api/src/main/java/com/theokanning/openai/web/WebExtractRequest.java‎
Lines changed: 113 additions & 0 deletions
@@ -6,7 +6,7 @@
     <parent>
         <groupId>top.bella</groupId>
         <artifactId>openai-java</artifactId>
-        <version>0.23.81</version>
+        <version>0.23.82</version>
     </parent>
     <packaging>jar</packaging>
     <artifactId>openai-api</artifactId>
 
@@ -0,0 +1,150 @@
+package com.theokanning.openai.web;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonValue;
+import com.theokanning.openai.assistants.IUssrRequest;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.experimental.SuperBuilder;
+
+import javax.validation.constraints.Min;
+import javax.validation.constraints.NotBlank;
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Web Crawl Request based on Tavily Crawl API Provides comprehensive web crawling functionality with configurable depth and filtering options
+ */
+@Data
+@SuperBuilder
+@NoArgsConstructor
+public class WebCrawlRequest implements IUssrRequest, Serializable {
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The root URL to begin the crawl (required) Example: "docs.tavily.com"
+     */
+    @NotBlank(message = "URL cannot be blank")
+    private String url;
+
+    /**
+     * Model to use for the crawl request
+     */
+    private String model;
+
+    /**
+     * A unique identifier representing your end-user
+     */
+    private String user;
+
+    /**
+     * Natural language instructions for the crawler When specified, the mapping cost increases to 2 API credits per 10 successful pages instead of 1
+     * API credit per 10 pages Example: "Find all pages about the Python SDK"
+     */
+    private String instructions;
+
+    /**
+     * Max depth of the crawl. Defines how far from the base URL the crawler can explore Default: 1
+     */
+    @JsonProperty("max_depth")
+    private Integer maxDepth = 1;
+
+    /**
+     * Max number of links to follow per level of the tree (i.e., per page) Default: 20
+     */
+    @JsonProperty("max_breadth")
+    private Integer maxBreadth = 20;
+
+    /**
+     * Total number of links the crawler will process before stopping Default: 50
+     */
+    private Integer limit = 50;
+
+    /**
+     * Regex patterns to select only URLs with specific path patterns Example: ["/docs/.*", "/api/v1.*"]
+     */
+    @JsonProperty("select_paths")
+    private List<String> selectPaths;
+
+    /**
+     * Regex patterns to select crawling to specific domains or subdomains Example: ["^docs\\.example\\.com$"]
+     */
+    @JsonProperty("select_domains")
+    private List<String> selectDomains;
+
+    /**
+     * Regex patterns to exclude URLs with specific path patterns Example: ["/private/.*", "/admin/.*"]
+     */
+    @JsonProperty("exclude_paths")
+    private List<String> excludePaths;
+
+    /**
+     * Regex patterns to exclude specific domains or subdomains from crawling Example: ["^private\\.example\\.com$"]
+     */
+    @JsonProperty("exclude_domains")
+    private List<String> excludeDomains;
+
+    /**
+     * Whether to include external domain links in the final results list Default: true
+     */
+    @JsonProperty("allow_external")
+    private Boolean allowExternal = true;
+
+    /**
+     * Whether to include images in the crawl results Default: false
+     */
+    @JsonProperty("include_images")
+    private Boolean includeImages = false;
+
+    /**
+     * Advanced extraction retrieves more data, including tables and embedded content, with higher success but may increase latency - basic: costs 1
+     * credit per 5 successful extractions - advanced: costs 2 credits per 5 successful extractions Default: basic
+     */
+    @JsonProperty("extract_depth")
+    private ExtractDepth extractDepth = ExtractDepth.BASIC;
+
+    /**
+     * The format of the extracted web page content - markdown: returns content in markdown format - text: returns plain text and may increase latency
+     * Default: markdown
+     */
+    private Format format = Format.MARKDOWN;
+
+    /**
+     * Whether to include the favicon URL for each result Default: false
+     */
+    @JsonProperty("include_favicon")
+    private Boolean includeFavicon = false;
+
+    /**
+     * Extract depth enum for extraction complexity
+     */
+    @AllArgsConstructor
+    public enum ExtractDepth {
+        BASIC("basic"),
+        ADVANCED("advanced");
+
+        private final String value;
+
+        @JsonValue
+        public String getValue() {
+            return value;
+        }
+    }
+
+    /**
+     * Format enum for content extraction format
+     */
+    @AllArgsConstructor
+    public enum Format {
+        MARKDOWN("markdown"),
+        TEXT("text");
+
+        private final String value;
+
+        @JsonValue
+        public String getValue() {
+            return value;
+        }
+    }
+}
@@ -0,0 +1,71 @@
+package com.theokanning.openai.web;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.experimental.SuperBuilder;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Web Crawl Response based on Tavily Crawl API Contains crawled content, metadata, and timing information from web crawling
+ */
+@Data
+@SuperBuilder
+@NoArgsConstructor
+public class WebCrawlResponse implements Serializable {
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The base URL that was crawled Example: "docs.tavily.com"
+     */
+    @JsonProperty("base_url")
+    private String baseUrl;
+
+    /**
+     * A list of extracted content from the crawled URLs
+     */
+    private List<CrawlResult> results;
+
+    /**
+     * Time in seconds it took to complete the request Example: 1.23
+     */
+    @JsonProperty("response_time")
+    private Double responseTime;
+
+    /**
+     * A unique request identifier you can share with customer support to help resolve issues with specific requests Example:
+     * "123e4567-e89b-12d3-a456-426614174111"
+     */
+    @JsonProperty("request_id")
+    private String requestId;
+
+
+    /**
+     * Crawl Result class representing individual crawled page results
+     */
+    @Data
+    @SuperBuilder
+    @NoArgsConstructor
+    public static class CrawlResult implements Serializable {
+        private static final long serialVersionUID = 1L;
+
+        /**
+         * The URL that was crawled Example: "https://docs.tavily.com"
+         */
+        private String url;
+
+        /**
+         * The full content extracted from the page This contains the complete extracted content in the specified format (markdown or text)
+         */
+        @JsonProperty("raw_content")
+        private String rawContent;
+
+        /**
+         * The favicon URL for the result Only present if include_favicon was set to true in the request Example:
+         * "https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3"
+         */
+        private String favicon;
+    }
+}
@@ -0,0 +1,113 @@
+package com.theokanning.openai.web;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonValue;
+import com.theokanning.openai.assistants.IUssrRequest;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.experimental.SuperBuilder;
+
+import javax.validation.constraints.NotEmpty;
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Web Extract Request based on Tavily Extract API
+ * Extract web page content from one or more specified URLs using Tavily Extract
+ */
+@Data
+@SuperBuilder
+@NoArgsConstructor
+public class WebExtractRequest implements IUssrRequest, Serializable {
+    private static final long serialVersionUID = 1L;
+
+    /**
+     * The URLs to extract content from (required)
+     * Example: ["https://en.wikipedia.org/wiki/Artificial_intelligence"]
+     */
+    @NotEmpty(message = "URLs cannot be empty")
+    private List<String> urls;
+
+    /**
+     * Model to use for the extract request
+     */
+    private String model;
+
+    /**
+     * A unique identifier representing your end-user
+     */
+    private String user;
+
+    /**
+     * Include a list of images extracted from the URLs in the response
+     * Default: false
+     */
+    @JsonProperty("include_images")
+    private Boolean includeImages = false;
+
+    /**
+     * Whether to include the favicon URL for each result
+     * Default: false
+     */
+    @JsonProperty("include_favicon")
+    private Boolean includeFavicon = false;
+
+    /**
+     * The depth of the extraction process
+     * - basic: costs 1 credit per 5 successful URL extractions
+     * - advanced: costs 2 credits per 5 successful URL extractions
+     * Default: basic
+     */
+    @JsonProperty("extract_depth")
+    private ExtractDepth extractDepth = ExtractDepth.BASIC;
+
+    /**
+     * The format of the extracted web page content
+     * - markdown: returns content in markdown format
+     * - text: returns plain text and may increase latency
+     * Default: markdown
+     */
+    private Format format = Format.MARKDOWN;
+
+    /**
+     * Maximum time in seconds to wait for the URL extraction before timing out
+     * Must be between 1.0 and 60.0 seconds
+     * If not specified, default timeouts are applied based on extract_depth:
+     * - 10 seconds for basic extraction
+     * - 30 seconds for advanced extraction
+     */
+    private Double timeout;
+
+    /**
+     * Extract depth enum for extraction complexity
+     */
+    @AllArgsConstructor
+    public enum ExtractDepth {
+        BASIC("basic"),
+        ADVANCED("advanced");
+
+        private final String value;
+
+        @JsonValue
+        public String getValue() {
+            return value;
+        }
+    }
+
+    /**
+     * Format enum for content extraction format
+     */
+    @AllArgsConstructor
+    public enum Format {
+        MARKDOWN("markdown"),
+        TEXT("text");
+
+        private final String value;
+
+        @JsonValue
+        public String getValue() {
+            return value;
+        }
+    }
+}