Sixt
diff --git a/‎go.mod‎
Lines changed: 2 additions & 0 deletions b/‎go.mod‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎go.sum‎
Lines changed: 4 additions & 0 deletions b/‎go.sum‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎opt.go‎
Lines changed: 48 additions & 0 deletions b/‎opt.go‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎opt_test.go‎
Lines changed: 81 additions & 0 deletions b/‎opt_test.go‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎parse_get.go‎
Lines changed: 48 additions & 0 deletions b/‎parse_get.go‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎parse_parse.go‎
Lines changed: 116 additions & 0 deletions b/‎parse_parse.go‎
Lines changed: 116 additions & 0 deletions
@@ -1,3 +1,5 @@
 module github.com/sixt/tensorlake-go
 
 go 1.25
+
+require github.com/google/jsonschema-go v0.4.2
@@ -0,0 +1,4 @@
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8=
+github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
@@ -0,0 +1,48 @@
+// Copyright 2025 SIXT SE
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tensorlake
+
+import (
+	"encoding/json"
+	"fmt"
+)
+
+// UnionValues is a union of values of type T.
+// It can be a single value or an array of values.
+type UnionValues[T any] []T
+
+// UnmarshalJSON unmarshals a JSON array or a single value into a UnionValues.
+func (v *UnionValues[T]) UnmarshalJSON(b []byte) error {
+	// Try a single value
+	var single T
+	if err := json.Unmarshal(b, &single); err == nil {
+		*v = []T{single}
+		return nil
+	}
+
+	// Try an array of values
+	var arr []T
+	if err := json.Unmarshal(b, &arr); err == nil {
+		*v = arr
+		return nil
+	}
+
+	return fmt.Errorf("value must be a single value or an array of values: %s", string(b))
+}
+
+// MarshalJSON marshals a UnionValues into a JSON array.
+func (v UnionValues[T]) MarshalJSON() ([]byte, error) {
+	return json.Marshal([]T(v))
+}
@@ -0,0 +1,81 @@
+// Copyright 2025 SIXT SE
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tensorlake
+
+import (
+	"encoding/json"
+	"reflect"
+	"testing"
+)
+
+func TestValueOrValuesUnmarshalJSON(t *testing.T) {
+	tests := []struct {
+		value    string
+		expected []int
+	}{
+		{
+			value:    "1",
+			expected: []int{1},
+		},
+		{
+			value:    "[1, 2, 3]",
+			expected: []int{1, 2, 3},
+		},
+	}
+
+	for _, test := range tests {
+		var v UnionValues[int]
+		if err := json.Unmarshal([]byte(test.value), &v); err != nil {
+			t.Fatalf("failed to unmarshal: %v", err)
+		}
+		if !reflect.DeepEqual(v, UnionValues[int](test.expected)) {
+			t.Fatalf("expected %v, got %v", test.expected, v)
+		}
+	}
+}
+
+func TestValueOrValuesMarshalJSON(t *testing.T) {
+	type testType struct {
+		Value UnionValues[int] `json:"value"`
+	}
+
+	tests := []struct {
+		value    string
+		expected testType
+	}{
+		{
+			value: `{"value": 1}`,
+			expected: testType{
+				Value: UnionValues[int]{1},
+			},
+		},
+		{
+			value: `{"value": [1, 2, 3]}`,
+			expected: testType{
+				Value: UnionValues[int]{1, 2, 3},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		var v testType
+		if err := json.Unmarshal([]byte(test.value), &v); err != nil {
+			t.Fatalf("failed to unmarshal: %v", err)
+		}
+		if !reflect.DeepEqual(v, test.expected) {
+			t.Fatalf("expected %+v, got %+v", test.expected, v)
+		}
+	}
+}
@@ -0,0 +1,48 @@
+// Copyright 2025 SIXT SE
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tensorlake
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+)
+
+// GetParseResult retrieves the result of a parse job.
+// The response will include: 1) parsed content (markdown or pages);
+// 2) structured extraction results (if schemas are provided during the parse request);
+// 3) page classification results (if page classifications are provided during the parse request).
+//
+// When the job finishes successfully, the response will contain pages
+// (chunks of the page) chunks (text chunks extracted from the document),
+// structured data (every schema_name provided in the parse request as a key).
+func (c *Client) GetParseResult(ctx context.Context, parseId string) (*ParseResult, error) {
+	reqURL := fmt.Sprintf("%s/parse/%s", c.baseURL, parseId)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	return do(c, req, func(r io.Reader) (*ParseResult, error) {
+		var result ParseResult
+		if err := json.NewDecoder(r).Decode(&result); err != nil {
+			return nil, fmt.Errorf("failed to decode response: %w", err)
+		}
+		return &result, nil
+	})
+}
@@ -0,0 +1,116 @@
+// Copyright 2025 SIXT SE
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tensorlake
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+)
+
+type ParseDocumentRequest struct {
+	FileSource
+
+	// ParsingOptions contains the properties of this object define
+	// the configuration for the document parsing process.
+	//
+	// Tensorlake provides sane defaults that work well for most
+	// documents, so this object is not required. However, every document
+	// is different, and you may want to customize the parsing process to
+	// better suit your needs.
+	ParsingOptions *ParsingOptions `json:"parsing_options,omitempty"`
+
+	// The properties of this object help to extend the output of the document
+	// parsing process with additional information.
+	//
+	// This includes summarization of tables and figures, which can help to
+	// provide a more comprehensive understanding of the document.
+	//
+	// This object is not required, and the API will use default settings if it
+	// is not present.
+	EnrichmentOptions *EnrichmentOptions `json:"enrichment_options,omitempty"`
+
+	// StructuredExtractionOptions is the options for structured data extraction.
+	//
+	// The properties of this object define the configuration for structured
+	// data extraction.
+	//
+	// If this object is present, the API will perform structured data
+	// extraction on the document.
+	StructuredExtractionOptions []StructuredExtractionOptions `json:"structured_extraction_options,omitempty"`
+
+	// PageClassificationOptions is the options for page classification.
+	//
+	// The properties of this object define the configuration for page
+	// classify.
+	//
+	// If this object is present, the API will perform page classify on
+	// the document.
+	PageClassificationOptions []PageClassConfig `json:"page_classifications,omitempty"`
+
+	// PageRange is a comma-separated list of page numbers or
+	// ranges to parse (e.g., '1,2,3-5'). Default: all pages.
+	// Examples: "1-5,8,10"
+	PageRange string `json:"page_range,omitempty"`
+
+	// Additional metadata to identify the read request. The labels are
+	// returned in the read response.
+	Labels map[string]string `json:"labels,omitempty"`
+
+	// MimeType is the MIME type of the file. This is used to determine how to process the file.
+	MimeType MimeType `json:"mime_type,omitempty"`
+}
+
+// ParseDocumentResponse represents the response from the ParseDocument operation.
+//
+// ParseId is the unique identifier for the parse job.
+// CreatedAt is the creation date and time of the parse job.
+type ParseDocumentResponse struct {
+	// ParseId is the unique identifier for the parse job.
+	// This is the ID that can be used to track the status of the parse job.
+	// Used in the GET /documents/v2/parse/{parse_id} endpoint to retrieve
+	// the status and results of the parse job.
+	ParseId string `json:"parse_id"`
+	// CreatedAt is the creation date and time of the parse job.
+	CreatedAt string `json:"created_at"`
+}
+
+// ParseDocument submits a document for comprehensive parsing (read, extract, and classify).
+func (c *Client) ParseDocument(ctx context.Context, in *ParseDocumentRequest) (*ParseDocumentResponse, error) {
+	if !in.SourceProvided() {
+		return nil, fmt.Errorf("exactly one of file_id, file_url, or raw_text must be provided")
+	}
+
+	body, _ := json.Marshal(in) // Impossible to fail?
+
+	slog.Info("ParseDocument request", "request", string(body))
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/parse", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	return do(c, req, func(r io.Reader) (*ParseDocumentResponse, error) {
+		var result ParseDocumentResponse
+		if err := json.NewDecoder(r).Decode(&result); err != nil {
+			return nil, fmt.Errorf("failed to decode response: %w", err)
+		}
+		return &result, nil
+	})
+}
-Original file line number
+Diff line change
@@ @@ -1,3 +1,5 @@ @@
 module github.com/sixt/tensorlake-go
 go 1.25
++
 +require github.com/google/jsonschema-go v0.4.2