diff --git a/README.md b/README.md index b4c4bb1..70c764e 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,56 @@ make html This generates both HTML files in the `_sites_/` directory. The pages are automatically deployed to GitHub Pages when changes are pushed to the main branch. +## backfill + +The `backfill/` directory contains metadata files that supplement the main crawl. Each file is named +`_.json` (e.g., `launchdarkly_java-server-sdk.json`), and the filename determines which +GitHub repo is queried for releases. + +There are two types of backfill files: + +### Old-repo backfill (default) + +Used when an SDK has migrated from a standalone repo to a monorepo. The backfill file references the +**old** repo to capture historical releases that used simple unprefixed tags (e.g., `7.7.0`). The new +monorepo's `.sdk_metadata.json` uses a `tag-prefix` to capture new releases (e.g., +`launchdarkly-java-server-sdk-7.8.0`). + +When the main crawl encounters a repo that has a backfill file, it **skips** that repo — since the +old repo's metadata is no longer authoritative. + +Example (`backfill/launchdarkly_java-server-sdk.json`): +```json +{ + "version": 1, + "sdks": { + "java-server-sdk": {} + } +} +``` + +### Same-repo backfill (`"same-repo": true`) + +Used when a repo changed its tag format (e.g., from `3.3.2` to `launchdarkly-react-client-sdk-v3.4.0`) +without migrating to a different repo. The backfill file references the **same** repo to capture the +old unprefixed releases, while the repo's `.sdk_metadata.json` uses a `tag-prefix` to capture the +new prefixed releases. + +Setting `"same-repo": true` tells the crawl that this repo is still the canonical home for the SDK. +The main crawl will **still process** this repo (rather than skipping it), so that full metadata and +prefixed releases are also ingested. + +Example (`backfill/launchdarkly_java-core.json`): +```json +{ + "same-repo": true, + "version": 1, + "sdks": { + "java-server-sdk": {} + } +} +``` + ## versioning policy The JSON products have an implicit 'v1' version at this time. diff --git a/backfill/launchdarkly_java-core.json b/backfill/launchdarkly_java-core.json new file mode 100644 index 0000000..99a6774 --- /dev/null +++ b/backfill/launchdarkly_java-core.json @@ -0,0 +1,7 @@ +{ + "same-repo": true, + "version": 1, + "sdks": { + "java-server-sdk": {} + } +} diff --git a/backfill/launchdarkly_react-client-sdk.json b/backfill/launchdarkly_react-client-sdk.json new file mode 100644 index 0000000..1b5d473 --- /dev/null +++ b/backfill/launchdarkly_react-client-sdk.json @@ -0,0 +1,7 @@ +{ + "same-repo": true, + "version": 1, + "sdks": { + "react-client-sdk": {} + } +} diff --git a/backfill/launchdarkly_vue-client-sdk.json b/backfill/launchdarkly_vue-client-sdk.json new file mode 100644 index 0000000..2584598 --- /dev/null +++ b/backfill/launchdarkly_vue-client-sdk.json @@ -0,0 +1,7 @@ +{ + "same-repo": true, + "version": 1, + "sdks": { + "vue": {} + } +} diff --git a/metadata/launchdarkly_java-core.json b/metadata/launchdarkly_java-core.json index 0d1eaa2..5828732 100644 --- a/metadata/launchdarkly_java-core.json +++ b/metadata/launchdarkly_java-core.json @@ -9,6 +9,9 @@ "Java" ], "userAgents": ["JavaClient"], + "releases": { + "tag-prefix": "launchdarkly-java-server-sdk-" + }, "features": { "allFlags": { "introduced": "1.0" }, "appMetadata": { "introduced": "5.8" }, diff --git a/metadata/launchdarkly_react-client-sdk.json b/metadata/launchdarkly_react-client-sdk.json index 1945f46..e1f7f79 100644 --- a/metadata/launchdarkly_react-client-sdk.json +++ b/metadata/launchdarkly_react-client-sdk.json @@ -7,7 +7,10 @@ "languages": [ "JavaScript", "TypeScript" ], - "wrapperNames": ["react-client-sdk"] + "wrapperNames": ["react-client-sdk"], + "releases": { + "tag-prefix": "launchdarkly-react-client-sdk-" + } } } } diff --git a/metadata/launchdarkly_vue-client-sdk.json b/metadata/launchdarkly_vue-client-sdk.json index 10ec582..df37f72 100644 --- a/metadata/launchdarkly_vue-client-sdk.json +++ b/metadata/launchdarkly_vue-client-sdk.json @@ -7,7 +7,10 @@ "languages": [ "JavaScript", "TypeScript" ], - "wrapperNames": ["vue-client-sdk"] + "wrapperNames": ["vue-client-sdk"], + "releases": { + "tag-prefix": "launchdarkly-vue-client-sdk-" + } } } } diff --git a/scripts/crawl.sh b/scripts/crawl.sh index b97acdb..2e01a2d 100755 --- a/scripts/crawl.sh +++ b/scripts/crawl.sh @@ -22,20 +22,48 @@ rm -rf "$temp_dir" sqlite3 "$temp_db" < ./schemas/sdk_metadata.sql mkdir "$temp_dir" +# Phase 1: Old-repo backfills. These capture releases from repositories that are +# no longer the canonical home for an SDK (e.g. standalone repos that have been +# merged into a monorepo). They run first so their sdk_repos entries have lower +# rowids, which matters because generate-products.sh uses jq reduce (last writer +# wins) to build repos.json. for file in ./backfill/*.json; do + same_repo=$(jq -r '."same-repo" // false' "$file") + if [ "$same_repo" = "true" ]; then + continue + fi repo=$(basename "$file" .json | tr '_' '/') echo "backfilling $repo" + ./tool/ingest -metadata "$file" -db "$temp_db" -repo "$repo" +done +# Phase 2: Same-repo backfills. These capture old unprefixed releases from repos +# that have transitioned to prefixed tags but are still the canonical home for the +# SDK. They run after old-repo backfills so their sdk_repos entries take precedence +# in repos.json. +for file in ./backfill/*.json; do + same_repo=$(jq -r '."same-repo" // false' "$file") + if [ "$same_repo" != "true" ]; then + continue + fi + repo=$(basename "$file" .json | tr '_' '/') + echo "backfilling $repo" ./tool/ingest -metadata "$file" -db "$temp_db" -repo "$repo" done +# Phase 3: Main crawl. Fetch live metadata and prefixed releases from GitHub. +# Repos with old-repo backfills are skipped (their metadata lives elsewhere now). +# Repos with same-repo backfills are still crawled for metadata and new releases. ./scripts/repos.sh | while read -r repo; do echo "checking $repo" sanitized_repo=$(echo "$repo" | tr '/' '_') if [ -f "./backfill/$sanitized_repo.json" ]; then - echo "skipping $repo, it was backfilled" - continue + same_repo=$(jq -r '."same-repo" // false' "./backfill/$sanitized_repo.json") + if [ "$same_repo" != "true" ]; then + echo "skipping $repo, it was backfilled" + continue + fi fi metadata=$(gh api "repos/$repo/contents/.sdk_metadata.json" -q '.content') || { diff --git a/tool/cmd/ingest/main.go b/tool/cmd/ingest/main.go index 5705da7..e74a7a1 100644 --- a/tool/cmd/ingest/main.go +++ b/tool/cmd/ingest/main.go @@ -249,7 +249,7 @@ func insertName(tx *sql.Tx, id string, metadata *metadataV1) error { } func insertRepo(tx *sql.Tx, id string, repo string) error { - stmt, err := tx.Prepare("INSERT INTO sdk_repos (id, github) VALUES (?, ?)") + stmt, err := tx.Prepare("INSERT INTO sdk_repos (id, github) VALUES (?, ?) ON CONFLICT DO NOTHING") if err != nil { return err } diff --git a/tool/lib/releases/releases.go b/tool/lib/releases/releases.go index dc0b132..6ccfe7f 100644 --- a/tool/lib/releases/releases.go +++ b/tool/lib/releases/releases.go @@ -16,8 +16,9 @@ const supportWindowYears = 1 // Raw is the raw tag data returned from the github GraphQL releases query. type Raw struct { - Tag string `graphql:"tagName"` - Date string `graphql:"publishedAt"` + Tag string `graphql:"tagName"` + Date string `graphql:"publishedAt"` + IsDraft bool `graphql:"isDraft"` } // Parsed is the post-processed version of a Raw structure, with the version extracted and date @@ -159,6 +160,9 @@ func Filter(releases []Raw, prefix string) ([]Parsed, error) { var processed []Parsed for _, r := range releases { + if r.IsDraft || r.Date == "" { + continue + } if !parser.Relevant(r.Tag) { continue }