Skip to content

Commit f540594

Browse files
Starting page number handling for split pdf page (#55)
**Only `SplitPdfHook.ts`, `SplitPdfHook.test.ts` and overlay_client.yaml files were modified by human. Rest of them were auto generated.** To run integration tests first run `unstructured-api` on port 8000
1 parent e81b172 commit f540594

31 files changed

+546
-127
lines changed

.npmignore

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
/*
2-
/docs/**/*.md
1+
**/*
32
!/**/*.ts
43
!/**/*.js
54
!/**/*.map

.speakeasy/gen.lock

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
lockVersion: 2.0.0
22
id: f42cb8e6-e2ce-4565-b975-5a9f38b94d5a
33
management:
4-
docChecksum: b3b369c355fbcab2d3248c8dc72afaa8
4+
docChecksum: 666d45deb8d9066b8e19e04a305ca734
55
docVersion: 0.0.1
6-
speakeasyVersion: 1.235.0
7-
generationVersion: 2.298.0
8-
releaseVersion: 0.12.2
9-
configChecksum: 247f78afa5a6eac6ec03b4806f3929bb
6+
speakeasyVersion: 1.272.0
7+
generationVersion: 2.312.1
8+
releaseVersion: 0.13.0
9+
configChecksum: 738cdd9408fdcef300989dc711ba3cd0
1010
repoURL: https://github.com/Unstructured-IO/unstructured-js-client.git
1111
repoSubDirectory: .
1212
installationURL: https://github.com/Unstructured-IO/unstructured-js-client
1313
published: true
1414
features:
1515
typescript:
16-
core: 3.7.0
16+
core: 3.8.0
1717
examples: 2.81.3
1818
globalSecurity: 2.82.9
1919
globalServerURLs: 2.82.4
@@ -28,6 +28,7 @@ generatedFiles:
2828
- .eslintrc.js
2929
- .npmignore
3030
- RUNTIMES.md
31+
- jsr.json
3132
- package.json
3233
- src/index.ts
3334
- src/lib/base64.ts

Makefile

+7-2
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,16 @@ build:
2929
## test-unit: run unit tests
3030
.PHONY: test-unit
3131
test-unit:
32-
npx jest --config jest.config.js test/unit
32+
npx jest --detectOpenHandles --config jest.config.js test/unit
33+
34+
## test-integration: run integration tests
35+
.PHONY: test-integration
36+
test-integration:
37+
npx jest --detectOpenHandles --config jest.config.js test/integration
3338

3439
## test: run all tests
3540
.PHONY: test
36-
test: test-unit
41+
test: test-unit test-integration
3742

3843
#############
3944
# Speakeasy #

README.md

+10-10
Original file line numberDiff line numberDiff line change
@@ -168,15 +168,15 @@ In order to speed up processing of long PDF files, set `splitPdfPage` parameter
168168

169169
Warning: this feature causes the `parent_id` metadata generation in elements to be disabled, as it requires having context of multiple pages.
170170

171-
The amount of parallel requests is controlled by `SplitPdfHook.parallelLimit`. By default it equals to 5. It can't be more than 15, to avoid too high resource usage and costs.
171+
The amount of parallel requests is controlled by `UNSTRUCTURED_CLIENT_SPLIT_CALL_THREADS` environmental variable. By default it equals to 5. It can't be more than 15, to avoid too high resource usage and costs.
172172

173173
```typescript
174174
import { SplitPdfHook } from "unstructured-client/hooks/custom/SplitPdfHook";
175175

176176
...
177177

178-
// Modify this parameter to change the limit of parallel request
179-
SplitPdfHook.parallelLimit = 10;
178+
// Modify this environmental variable to change the limit of parallel requests
179+
process.env["UNSTRUCTURED_CLIENT_SPLIT_CALL_THREADS"] = "10";
180180

181181
client.general.partition({
182182
files: {
@@ -219,14 +219,14 @@ Certain SDK methods accept files as part of a multi-part request. It is possible
219219
import { openAsBlob } from "node:fs";
220220
import { UnstructuredClient } from "unstructured-client";
221221

222-
async function run() {
223-
const sdk = new UnstructuredClient({
224-
security: {
225-
apiKeyAuth: "YOUR_API_KEY",
226-
},
227-
});
222+
const unstructuredClient = new UnstructuredClient({
223+
security: {
224+
apiKeyAuth: "YOUR_API_KEY",
225+
},
226+
});
228227

229-
const result = await sdk.general.partition({
228+
async function run() {
229+
const result = await unstructuredClient.general.partition({
230230
chunkingStrategy: "by_title",
231231
combineUnderNChars: 500,
232232
encoding: "utf-8",

USAGE.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import { openAsBlob } from "node:fs";
44
import { UnstructuredClient } from "unstructured-client";
55

6-
async function run() {
7-
const sdk = new UnstructuredClient({
8-
security: {
9-
apiKeyAuth: "YOUR_API_KEY",
10-
},
11-
});
6+
const unstructuredClient = new UnstructuredClient({
7+
security: {
8+
apiKeyAuth: "YOUR_API_KEY",
9+
},
10+
});
1211

13-
const result = await sdk.general.partition({
12+
async function run() {
13+
const result = await unstructuredClient.general.partition({
1414
chunkingStrategy: "by_title",
1515
combineUnderNChars: 500,
1616
encoding: "utf-8",

docs/sdk/models/shared/partitionparameters.md

+26-25
Large diffs are not rendered by default.

gen.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ generation:
1010
auth:
1111
oAuth2ClientCredentialsEnabled: false
1212
typescript:
13-
version: 0.12.2
13+
version: 0.13.0
1414
additionalDependencies:
1515
dependencies:
1616
async: ^3.2.5
@@ -21,8 +21,10 @@ typescript:
2121
jest: ^29.7.0
2222
ts-jest: ^29.1.2
2323
peerDependencies: {}
24+
additionalPackageJSON: {}
2425
author: Unstructured
2526
clientServerStatusCodesAsErrors: true
27+
enumFormat: enum
2628
flattenGlobalSecurity: false
2729
imports:
2830
option: openapi

jest.config.js

+1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
module.exports = {
33
preset: "ts-jest",
44
testEnvironment: "node",
5+
silent: true,
56
};

jsr.json

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
2+
3+
{
4+
"name": "unstructured-client",
5+
"version": "0.13.0",
6+
"exports": {
7+
".": "./src/index.ts",
8+
"./sdk/models/errors": "./src/sdk/models/errors/index.ts",
9+
"./sdk/models/shared": "./src/sdk/models/shared/index.ts",
10+
"./sdk/models/operations": "./src/sdk/models/operations/index.ts",
11+
"./lib/config": "./src/lib/config.ts",
12+
"./lib/http": "./src/lib/http.ts",
13+
"./lib/retries": "./src/lib/retries.ts",
14+
"./lib/sdks": "./src/lib/sdks.ts",
15+
"./types": "./src/sdk/types/index.ts"
16+
},
17+
"publish": {
18+
"include": [
19+
"LICENSE",
20+
"README.md",
21+
"RUNTIMES.md",
22+
"USAGE.md",
23+
"src/**/*.ts"
24+
]
25+
}
26+
}

overlay_client.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@ actions:
1111
"title": "Split Pdf Page",
1212
"description": "Should the pdf file be split at client. Ignored on backend.",
1313
}
14+
- target: $["components"]["schemas"]["partition_parameters"]["properties"]
15+
update:
16+
"starting_page_number":
17+
{
18+
"type": "integer",
19+
"title": "Starting Page Number",
20+
"description": "The real number of the first PDF page.",
21+
}

package.json

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "unstructured-client",
3-
"version": "0.12.2",
4-
"author": "Unstructured",
3+
"version": "0.13.0",
4+
"author": "Unstructured",
55
"main": "./index.js",
66
"sideEffects": false,
77
"repository": {
@@ -14,7 +14,6 @@
1414
"build": "tsc",
1515
"prepare": "npm run build"
1616
},
17-
"keywords": [],
1817
"peerDependencies": {
1918
"zod": ">= 3"
2019
},

0 commit comments

Comments
 (0)