Skip to content

Commit 2c399ff

Browse files
committed
Add fetch storage implementation
1 parent e084edd commit 2c399ff

File tree

3 files changed

+163
-1
lines changed

3 files changed

+163
-1
lines changed

icechunk-js/fetch-storage.d.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import type { Storage } from '@earthmover/icechunk'
2+
3+
/**
4+
* Create a read-only storage backend that fetches objects over HTTP.
5+
*
6+
* Works with any publicly accessible icechunk repository hosted on S3-compatible
7+
* storage. Requires the bucket to support anonymous reads and S3 XML listing.
8+
*
9+
* @param baseUrl - Base URL of the icechunk repository (no trailing slash)
10+
*
11+
* @example
12+
* ```ts
13+
* import { Repository } from '@earthmover/icechunk'
14+
* import { createFetchStorage } from '@earthmover/icechunk/fetch-storage'
15+
*
16+
* const storage = createFetchStorage('https://my-bucket.s3.us-west-2.amazonaws.com/path/to/repo.icechunk')
17+
* const repo = await Repository.open(storage)
18+
* ```
19+
*/
20+
export declare function createFetchStorage(baseUrl: string): Storage

icechunk-js/fetch-storage.js

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/**
2+
* Read-only fetch-based storage backend for Icechunk.
3+
*
4+
* Usage:
5+
* import { Repository } from '@earthmover/icechunk'
6+
* import { createFetchStorage } from '@earthmover/icechunk/fetch-storage'
7+
*
8+
* const storage = createFetchStorage('https://my-bucket.s3.amazonaws.com/my-repo.icechunk')
9+
* const repo = await Repository.open(storage)
10+
*/
11+
12+
const { Storage } = require('@earthmover/icechunk')
13+
14+
/**
15+
* @param {string} baseUrl - Base URL of the icechunk repository (no trailing slash)
16+
* @returns {Storage}
17+
*/
18+
function createFetchStorage(baseUrl) {
19+
// Normalize: strip trailing slash
20+
const base = baseUrl.replace(/\/+$/, '')
21+
22+
function throwForStatus(resp, url) {
23+
if (resp.ok) return
24+
if (resp.status === 404) {
25+
throw new Error(`ObjectNotFound: ${url}`)
26+
}
27+
throw new Error(`HTTP ${resp.status}: ${url}`)
28+
}
29+
30+
return Storage.newCustom({
31+
canWrite: async (_err) => false,
32+
33+
getObjectRange: async (_err, { path, rangeStart, rangeEnd }) => {
34+
const url = `${base}/${path}`
35+
const headers = {}
36+
37+
if (rangeStart != null && rangeEnd != null) {
38+
headers['Range'] = `bytes=${rangeStart}-${rangeEnd - 1}`
39+
} else if (rangeStart != null) {
40+
headers['Range'] = `bytes=${rangeStart}-`
41+
}
42+
43+
const resp = await fetch(url, { headers })
44+
throwForStatus(resp, url)
45+
46+
const data = new Uint8Array(await resp.arrayBuffer())
47+
const etag = resp.headers.get('etag') ?? undefined
48+
return { data, version: { etag } }
49+
},
50+
51+
putObject: async () => {
52+
throw new Error('Read-only storage: putObject not supported')
53+
},
54+
55+
copyObject: async () => {
56+
throw new Error('Read-only storage: copyObject not supported')
57+
},
58+
59+
listObjects: async (_err, prefix) => {
60+
// Try S3-style XML listing
61+
// Derive bucket URL and key prefix from the base URL
62+
// e.g. https://bucket.s3.region.amazonaws.com/prefix -> bucket URL + key prefix
63+
const url = new URL(base)
64+
const keyPrefix = url.pathname.replace(/^\//, '') + '/' + prefix
65+
const listUrl = `${url.origin}/?list-type=2&prefix=${encodeURIComponent(keyPrefix)}`
66+
67+
const resp = await fetch(listUrl)
68+
throwForStatus(resp, listUrl)
69+
70+
const xml = await resp.text()
71+
const results = []
72+
const contentRegex = /<Contents>([\s\S]*?)<\/Contents>/g
73+
let match
74+
while ((match = contentRegex.exec(xml)) !== null) {
75+
const block = match[1]
76+
const key = block.match(/<Key>(.*?)<\/Key>/)?.[1]
77+
const lastModified = block.match(/<LastModified>(.*?)<\/LastModified>/)?.[1]
78+
const size = block.match(/<Size>(.*?)<\/Size>/)?.[1]
79+
if (key && lastModified && size) {
80+
const id = key.startsWith(keyPrefix) ? key.slice(keyPrefix.length) : key
81+
results.push({
82+
id,
83+
createdAt: new Date(lastModified),
84+
sizeBytes: Number(size),
85+
})
86+
}
87+
}
88+
89+
return results
90+
},
91+
92+
deleteBatch: async () => {
93+
throw new Error('Read-only storage: deleteBatch not supported')
94+
},
95+
96+
getObjectLastModified: async (_err, path) => {
97+
const url = `${base}/${path}`
98+
const resp = await fetch(url, { method: 'HEAD' })
99+
throwForStatus(resp, url)
100+
const lastModified = resp.headers.get('last-modified')
101+
if (!lastModified) {
102+
throw new Error(`No Last-Modified header: ${url}`)
103+
}
104+
return new Date(lastModified)
105+
},
106+
107+
getObjectConditional: async (_err, { path, previousVersion }) => {
108+
const url = `${base}/${path}`
109+
const headers = {}
110+
111+
if (previousVersion?.etag) {
112+
headers['If-None-Match'] = previousVersion.etag
113+
}
114+
115+
const resp = await fetch(url, { headers })
116+
117+
if (resp.status === 304) {
118+
return { kind: 'on_latest_version' }
119+
}
120+
121+
throwForStatus(resp, url)
122+
123+
const data = new Uint8Array(await resp.arrayBuffer())
124+
const etag = resp.headers.get('etag') ?? undefined
125+
return { kind: 'modified', data, newVersion: { etag } }
126+
},
127+
})
128+
}
129+
130+
module.exports = { createFetchStorage }

icechunk-js/package.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,22 @@
1919
"science",
2020
"geospatial"
2121
],
22+
"exports": {
23+
".": {
24+
"browser": "./browser.js",
25+
"default": "./index.js"
26+
},
27+
"./fetch-storage": {
28+
"types": "./fetch-storage.d.ts",
29+
"default": "./fetch-storage.js"
30+
}
31+
},
2232
"files": [
2333
"index.d.ts",
2434
"index.js",
25-
"browser.js"
35+
"browser.js",
36+
"fetch-storage.js",
37+
"fetch-storage.d.ts"
2638
],
2739
"napi": {
2840
"binaryName": "icechunk",

0 commit comments

Comments
 (0)