Skip to content

Commit db23005

Browse files
committed
feat(cloud): upload sqlite archive objects
1 parent dcf0c9c commit db23005

5 files changed

Lines changed: 350 additions & 11 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
### Changes
66

7-
- Update `crawlkit` to v0.8.0.
7+
- Update `crawlkit` to v0.10.0.
88
- Add read-only Cloudflare remote archive scaffolding with `[remote]` config,
99
`subscribe-cloud`, GitHub-backed `remote login` with OAuth or token-env
1010
bootstrap, `remote status`, `remote archives`, and cloud-mode `status --json`
@@ -14,6 +14,9 @@
1414
- Add `discrawl cloud publish` to export non-DM local SQLite rows into the
1515
Cloudflare remote archive ingest API without changing Git snapshot
1616
publishing.
17+
- Mirror the non-DM local SQLite archive into the Worker-backed R2 object store
18+
during `discrawl cloud publish`, alongside the D1 row ingest used for live
19+
queries.
1720

1821
### Fixes
1922

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ require (
4444
github.com/muesli/cancelreader v0.2.2 // indirect
4545
github.com/muesli/termenv v0.16.0 // indirect
4646
github.com/ncruces/go-strftime v1.0.0 // indirect
47-
github.com/openclaw/crawlkit v0.8.0
47+
github.com/openclaw/crawlkit v0.10.0
4848
github.com/pmezard/go-difflib v1.0.0 // indirect
4949
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
5050
github.com/rivo/uniseg v0.4.7 // indirect

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc
7171
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
7272
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
7373
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
74-
github.com/openclaw/crawlkit v0.8.0 h1:EZEAMy7dI6zVn+AC6lN8GycMY8bV3TF3KnheG55A0uU=
75-
github.com/openclaw/crawlkit v0.8.0/go.mod h1:2XkSx3N8yzyjc+Jyf1Zl9VEQVlElh/dzBMW1cRMQQGw=
74+
github.com/openclaw/crawlkit v0.10.0 h1:FKe5Aus+lgik76KswimTvnlwMdfiuaZV1MGign6z73k=
75+
github.com/openclaw/crawlkit v0.10.0/go.mod h1:2XkSx3N8yzyjc+Jyf1Zl9VEQVlElh/dzBMW1cRMQQGw=
7676
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
7777
github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
7878
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=

internal/cli/cli_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package cli
33
import (
44
"bytes"
55
"context"
6+
"database/sql"
67
"encoding/json"
78
"errors"
89
"flag"
@@ -92,6 +93,10 @@ func TestCommandValidationEdges(t *testing.T) {
9293
{"--config", cfgPath, "sync", "--source", "bogus"},
9394
{"--config", cfgPath, "sync", "--since", "not-time"},
9495
{"--config", cfgPath, "sync", "--no-update", "--update", "force"},
96+
{"--config", cfgPath, "cloud", "publish", "--bogus"},
97+
{"--config", cfgPath, "cloud", "publish", "extra"},
98+
{"--config", cfgPath, "cloud", "publish", "--json"},
99+
{"--config", cfgPath, "cloud", "publish", "--remote", "https://remote.example"},
95100
{"--config", cfgPath, "publish", "--remote", ""},
96101
{"--config", cfgPath, "subscribe"},
97102
{"--config", cfgPath, "update", "extra"},
@@ -1300,8 +1305,59 @@ func TestCloudPublishSendsNonDMRows(t *testing.T) {
13001305
tokenEnv := "DISCRAWL_TEST_PUBLISH_TOKEN"
13011306
t.Setenv(tokenEnv, "publish-token")
13021307
seenTables := map[string]crawlremote.IngestRequest{}
1308+
var sawSQLiteUpload bool
13031309
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
13041310
assert.Equal(t, "Bearer publish-token", req.Header.Get("Authorization"))
1311+
if req.Method == http.MethodPut && req.URL.EscapedPath() == "/v1/apps/discrawl/archives/discrawl%2Fopenclaw/sqlite" {
1312+
sawSQLiteUpload = true
1313+
payload, err := io.ReadAll(req.Body)
1314+
if err != nil {
1315+
t.Errorf("read sqlite upload: %v", err)
1316+
http.Error(w, err.Error(), http.StatusBadRequest)
1317+
return
1318+
}
1319+
if !bytes.HasPrefix(payload, []byte("SQLite format 3")) {
1320+
t.Errorf("sqlite upload should contain a sqlite image")
1321+
http.Error(w, "not sqlite", http.StatusBadRequest)
1322+
return
1323+
}
1324+
uploadPath := filepath.Join(dir, "uploaded-cloud.db")
1325+
if err := os.WriteFile(uploadPath, payload, 0o600); err != nil {
1326+
t.Errorf("write sqlite upload: %v", err)
1327+
http.Error(w, err.Error(), http.StatusInternalServerError)
1328+
return
1329+
}
1330+
uploadDB, err := sql.Open("sqlite", uploadPath)
1331+
if err != nil {
1332+
t.Errorf("open sqlite upload: %v", err)
1333+
http.Error(w, err.Error(), http.StatusInternalServerError)
1334+
return
1335+
}
1336+
defer func() { _ = uploadDB.Close() }()
1337+
var dmMessages int
1338+
if err := uploadDB.QueryRowContext(ctx, "select count(*) from messages where guild_id = ?", store.DirectMessageGuildID).Scan(&dmMessages); err != nil {
1339+
t.Errorf("count dm messages: %v", err)
1340+
http.Error(w, err.Error(), http.StatusInternalServerError)
1341+
return
1342+
}
1343+
assert.Zero(t, dmMessages)
1344+
var tableCount int
1345+
if err := uploadDB.QueryRowContext(ctx, "select count(*) from sqlite_master where type = 'table' and name in ('guilds', 'channels', 'members', 'messages')").Scan(&tableCount); err != nil {
1346+
t.Errorf("count cloud tables: %v", err)
1347+
http.Error(w, err.Error(), http.StatusInternalServerError)
1348+
return
1349+
}
1350+
assert.Equal(t, 4, tableCount)
1351+
assert.NotEmpty(t, req.Header.Get("X-Crawl-Content-Sha256"))
1352+
w.Header().Set("Content-Type", "application/json")
1353+
_ = json.NewEncoder(w).Encode(crawlremote.SQLiteUploadResult{
1354+
App: "discrawl",
1355+
Archive: "discrawl/openclaw",
1356+
Complete: true,
1357+
Object: &crawlremote.SQLiteObject{Key: "v1/discrawl/discrawl%2Fopenclaw/sqlite/current.db", Size: int64(len(payload))},
1358+
})
1359+
return
1360+
}
13051361
assert.Equal(t, http.MethodPost, req.Method)
13061362
assert.Equal(t, "/v1/apps/discrawl/archives/discrawl%2Fopenclaw/ingest", req.URL.EscapedPath())
13071363
var body crawlremote.IngestRequest
@@ -1339,11 +1395,101 @@ func TestCloudPublishSendsNonDMRows(t *testing.T) {
13391395
require.Len(t, seenTables["channels"].Rows, 1)
13401396
require.Len(t, seenTables["messages"].Rows, 1)
13411397
require.True(t, seenTables["messages"].Final)
1398+
require.True(t, sawSQLiteUpload)
13421399

13431400
var payload map[string]any
13441401
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
13451402
require.InDelta(t, float64(1), payload["guilds"], 0)
13461403
require.InDelta(t, float64(1), payload["messages"], 0)
1404+
require.NotNil(t, payload["sqlite_object"])
1405+
}
1406+
1407+
func TestCloudSQLiteExportHelpers(t *testing.T) {
1408+
ctx := context.Background()
1409+
dir := t.TempDir()
1410+
sourcePath := filepath.Join(dir, "source.db")
1411+
s := seedCLIStore(t, sourcePath)
1412+
require.NoError(t, s.UpsertMember(ctx, store.MemberRecord{
1413+
GuildID: "g1",
1414+
UserID: "u1",
1415+
Username: "peter",
1416+
DisplayName: "Peter",
1417+
RoleIDsJSON: `[]`,
1418+
RawJSON: `{"private":"ignored"}`,
1419+
}))
1420+
require.NoError(t, addCLIDMAttachment(ctx, s))
1421+
1422+
require.Empty(t, sqlPlaceholders(0))
1423+
require.Equal(t, "?,?,?", sqlPlaceholders(3))
1424+
1425+
snapshotPath, cleanup, err := sqliteSnapshotPath(ctx, s.DB())
1426+
require.NoError(t, err)
1427+
require.FileExists(t, snapshotPath)
1428+
cleanup()
1429+
require.NoFileExists(t, snapshotPath)
1430+
1431+
exportPath := filepath.Join(dir, "cloud.db")
1432+
require.NoError(t, writeCloudSQLiteExport(ctx, s.DB(), exportPath))
1433+
require.NoError(t, s.Close())
1434+
1435+
sum, err := cloudFileSHA256(exportPath)
1436+
require.NoError(t, err)
1437+
require.Len(t, sum, 64)
1438+
_, err = cloudFileSHA256(filepath.Join(dir, "missing.db"))
1439+
require.Error(t, err)
1440+
1441+
cloudDB, err := sql.Open("sqlite", exportPath)
1442+
require.NoError(t, err)
1443+
defer func() { _ = cloudDB.Close() }()
1444+
1445+
var guilds, channels, members, messages int
1446+
require.NoError(t, cloudDB.QueryRowContext(ctx, "select count(*) from guilds").Scan(&guilds))
1447+
require.NoError(t, cloudDB.QueryRowContext(ctx, "select count(*) from channels").Scan(&channels))
1448+
require.NoError(t, cloudDB.QueryRowContext(ctx, "select count(*) from members").Scan(&members))
1449+
require.NoError(t, cloudDB.QueryRowContext(ctx, "select count(*) from messages").Scan(&messages))
1450+
require.Equal(t, 1, guilds)
1451+
require.Equal(t, 1, channels)
1452+
require.Equal(t, 1, members)
1453+
require.Equal(t, 1, messages)
1454+
1455+
var dmRows int
1456+
require.NoError(t, cloudDB.QueryRowContext(ctx, "select count(*) from messages where guild_id = ?", store.DirectMessageGuildID).Scan(&dmRows))
1457+
require.Zero(t, dmRows)
1458+
1459+
var authorUsername string
1460+
require.NoError(t, cloudDB.QueryRowContext(ctx, "select author_username from messages where message_id = 'm100'").Scan(&authorUsername))
1461+
require.Equal(t, "Peter", authorUsername)
1462+
1463+
var rawJSONColumns int
1464+
require.NoError(t, cloudDB.QueryRowContext(ctx, "select count(*) from pragma_table_info('messages') where name = 'raw_json'").Scan(&rawJSONColumns))
1465+
require.Zero(t, rawJSONColumns)
1466+
}
1467+
1468+
func TestCopyCloudSQLiteRowsErrorsAndBytes(t *testing.T) {
1469+
ctx := context.Background()
1470+
dir := t.TempDir()
1471+
source, err := sql.Open("sqlite", filepath.Join(dir, "source.db"))
1472+
require.NoError(t, err)
1473+
defer func() { _ = source.Close() }()
1474+
out, err := sql.Open("sqlite", filepath.Join(dir, "out.db"))
1475+
require.NoError(t, err)
1476+
defer func() { _ = out.Close() }()
1477+
1478+
_, err = source.ExecContext(ctx, `create table source_rows(value blob)`)
1479+
require.NoError(t, err)
1480+
_, err = source.ExecContext(ctx, `insert into source_rows(value) values(x'68656c6c6f')`)
1481+
require.NoError(t, err)
1482+
_, err = out.ExecContext(ctx, `create table copied(value text)`)
1483+
require.NoError(t, err)
1484+
1485+
require.NoError(t, copyCloudSQLiteRows(ctx, source, out, "copied", []string{"value"}, `select value from source_rows`))
1486+
var value string
1487+
require.NoError(t, out.QueryRowContext(ctx, `select value from copied`).Scan(&value))
1488+
require.Equal(t, "hello", value)
1489+
1490+
err = copyCloudSQLiteRows(ctx, source, out, "copied", []string{"value"}, `select missing from source_rows`)
1491+
require.Error(t, err)
1492+
require.Contains(t, err.Error(), "query sqlite cloud export copied")
13471493
}
13481494

13491495
func TestShareCommandsPublishSubscribeAndUpdate(t *testing.T) {

0 commit comments

Comments
 (0)