estuary
diff --git a/‎source-google-play/VERSION‎
Lines changed: 1 addition & 0 deletions b/‎source-google-play/VERSION‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎source-google-play/acmeCo/crashes.schema.yaml‎
Lines changed: 44 additions & 0 deletions b/‎source-google-play/acmeCo/crashes.schema.yaml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎source-google-play/acmeCo/flow.yaml‎
Lines changed: 20 additions & 0 deletions b/‎source-google-play/acmeCo/flow.yaml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎source-google-play/acmeCo/installs.schema.yaml‎
Lines changed: 44 additions & 0 deletions b/‎source-google-play/acmeCo/installs.schema.yaml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎source-google-play/acmeCo/reviews.schema.yaml‎
Lines changed: 44 additions & 0 deletions b/‎source-google-play/acmeCo/reviews.schema.yaml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎source-google-play/poetry.lock‎
Lines changed: 4253 additions & 0 deletions b/‎source-google-play/poetry.lock‎
Lines changed: 4253 additions & 0 deletions
diff --git a/‎source-google-play/pyproject.toml‎
Lines changed: 20 additions & 0 deletions b/‎source-google-play/pyproject.toml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎source-google-play/source-google-play.config.yaml‎
Lines changed: 6 additions & 0 deletions b/‎source-google-play/source-google-play.config.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎source-google-play/source_google_play/TODO.md‎
Lines changed: 23 additions & 0 deletions b/‎source-google-play/source_google_play/TODO.md‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎source-google-play/source_google_play/__init__.py‎
Lines changed: 62 additions & 0 deletions b/‎source-google-play/source_google_play/__init__.py‎
Lines changed: 62 additions & 0 deletions
@@ -0,0 +1 @@
+v1
@@ -0,0 +1,44 @@
+---
+$defs:
+  Meta:
+    properties:
+      op:
+        default: u
+        description: "Operation type (c: Create, u: Update, d: Delete)"
+        enum:
+          - c
+          - u
+          - d
+        title: Op
+        type: string
+      row_id:
+        default: -1
+        description: "Row ID of the Document, counting up from zero, or -1 if not known"
+        title: Row Id
+        type: integer
+    title: Meta
+    type: object
+additionalProperties: true
+properties:
+  _meta:
+    $ref: "#/$defs/Meta"
+    default:
+      op: u
+      row_id: -1
+    description: Document metadata
+  package_name:
+    title: Package Name
+    type: string
+  row_number:
+    title: Row Number
+    type: integer
+  date:
+    title: Date
+    type: string
+required:
+  - package_name
+  - row_number
+  - date
+title: Crashes
+type: object
+x-infer-schema: true
@@ -0,0 +1,20 @@
+---
+collections:
+  acmeCo/crashes:
+    schema: crashes.schema.yaml
+    key:
+      - /date
+      - /package_name
+      - /row_number
+  acmeCo/installs:
+    schema: installs.schema.yaml
+    key:
+      - /date
+      - /package_name
+      - /row_number
+  acmeCo/reviews:
+    schema: reviews.schema.yaml
+    key:
+      - /package_name
+      - /row_number
+      - /year_month
@@ -0,0 +1,44 @@
+---
+$defs:
+  Meta:
+    properties:
+      op:
+        default: u
+        description: "Operation type (c: Create, u: Update, d: Delete)"
+        enum:
+          - c
+          - u
+          - d
+        title: Op
+        type: string
+      row_id:
+        default: -1
+        description: "Row ID of the Document, counting up from zero, or -1 if not known"
+        title: Row Id
+        type: integer
+    title: Meta
+    type: object
+additionalProperties: true
+properties:
+  _meta:
+    $ref: "#/$defs/Meta"
+    default:
+      op: u
+      row_id: -1
+    description: Document metadata
+  package_name:
+    title: Package Name
+    type: string
+  row_number:
+    title: Row Number
+    type: integer
+  date:
+    title: Date
+    type: string
+required:
+  - package_name
+  - row_number
+  - date
+title: Installs
+type: object
+x-infer-schema: true
@@ -0,0 +1,44 @@
+---
+$defs:
+  Meta:
+    properties:
+      op:
+        default: u
+        description: "Operation type (c: Create, u: Update, d: Delete)"
+        enum:
+          - c
+          - u
+          - d
+        title: Op
+        type: string
+      row_id:
+        default: -1
+        description: "Row ID of the Document, counting up from zero, or -1 if not known"
+        title: Row Id
+        type: integer
+    title: Meta
+    type: object
+additionalProperties: true
+properties:
+  _meta:
+    $ref: "#/$defs/Meta"
+    default:
+      op: u
+      row_id: -1
+    description: Document metadata
+  package_name:
+    title: Package Name
+    type: string
+  row_number:
+    title: Row Number
+    type: integer
+  year_month:
+    title: Year Month
+    type: string
+required:
+  - package_name
+  - row_number
+  - year_month
+title: Reviews
+type: object
+x-infer-schema: true
@@ -0,0 +1,20 @@
+[tool.poetry]
+version = "0.1.0"
+name = "source_google_play"
+description = ""
+authors = ["Alex Bair <alexb@estuary.dev>"]
+
+[tool.poetry.dependencies]
+estuary-cdk = {path="../estuary-cdk", develop = true}
+python = "^3.12"
+pydantic = "^2"
+
+[tool.poetry.group.dev.dependencies]
+debugpy = "^1.8.0"
+mypy = "^1.8.0"
+pytest = "^7.4.3"
+pytest-insta = "^0.3.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,6 @@
+---
+bucket: pubsite_prod_rev_01234567890987654321_my_bucket
+start_date: "2025-07-24T00:00:00Z"
+credentials:
+  credentials_title: Google Service Account
+  service_account: "{\n  \"type\": \"service_account\",\n  \"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\n  \"token_uri\": \"https://oauth2.googleapis.com/token\",\n  \"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\n  \"client_x509_cert_url\": \"some_cert_url\",\n  \"universe_domain\": \"googleapis.com\"\n}\n"
@@ -0,0 +1,23 @@
+# Status
+
+This connector is still in development since we haven't been able to see what the Google Play data looks like without valid credentials.
+
+# What's been tested
+- `GCSClient.list_all_files` does list all files in a GCS bucket.
+  - The `prefix` and `globPattern` inputs work to filter what files are returned.
+- The `GoogleServiceAccount` credentials work with a valid service account JSON to make successful requests to GCS.
+- A capture can be created. It won't yield any documents, but we'll be able to finish development once we have an active capture created.
+
+
+# What hasn't been tested / outstanding questions
+- `GCSClient.stream_csv`
+  - What's the dialect for the CSVs that are in the GCS bucket? I suspect the CSVs use UTF-16 encoding, so we will need to pass a custom `CSVConfig` into the `IncrementalCSVProcessor`.
+- The `_add_row_number` and `_extract_year_month` before model validators.
+  - Do these work in general?
+  - Are they inserting the correct values into each record?
+  - Are there other, undocumented fields already in the records that contain the same data these model validators add?
+- Generally, what do the CSVs look like and what fields are always present in each row?
+  - Are the CSVs named like the Google Play docs say they are?
+  - Is there some nice `id` type field we can use as a unique identifier for a row across all CSVs?
+  - For `Statistics`, in the CSV for the current month, are rows for previous days no longer updated? Are we able improve the incremental strategy for these streams by only yielding rows for the same date as the current log cursor?
+  - For `Reviews`, in the CSV for the current month, is there an always populated field like `review_last_update_date_and_time` that has fine enough grain that we could use it as a cursor field?
@@ -0,0 +1,62 @@
+from logging import Logger
+from typing import Awaitable, Callable
+
+from estuary_cdk.flow import (
+    ConnectorSpec,
+)
+from estuary_cdk.capture import (
+    BaseCaptureConnector,
+    Request,
+    Task,
+    common,
+    request,
+    response,
+)
+
+from .resources import all_resources, validate_credentials
+from .models import (
+    ConnectorState,
+    EndpointConfig,
+    ResourceConfig,
+)
+
+
+class Connector(
+    BaseCaptureConnector[EndpointConfig, ResourceConfig, ConnectorState],
+):
+    def request_class(self):
+        return Request[EndpointConfig, ResourceConfig, ConnectorState]
+
+    async def spec(self, log: Logger, _: request.Spec) -> ConnectorSpec:
+        return ConnectorSpec(
+            configSchema=EndpointConfig.model_json_schema(),
+            oauth2=None,
+            documentationUrl="https://go.estuary.dev/source-google-play",
+            resourceConfigSchema=ResourceConfig.model_json_schema(),
+            resourcePathPointers=ResourceConfig.PATH_POINTERS,
+        )
+
+    async def discover(
+        self, log: Logger, discover: request.Discover[EndpointConfig]
+    ) -> response.Discovered[ResourceConfig]:
+        resources = await all_resources(log, self, discover.config)
+        return common.discovered(resources)
+
+    async def validate(
+        self,
+        log: Logger,
+        validate: request.Validate[EndpointConfig, ResourceConfig],
+    ) -> response.Validated:
+        await validate_credentials(log, self, validate.config)
+        resources = await all_resources(log, self, validate.config)
+        resolved = common.resolve_bindings(validate.bindings, resources)
+        return common.validated(resolved)
+
+    async def open(
+        self,
+        log: Logger,
+        open: request.Open[EndpointConfig, ResourceConfig, ConnectorState],
+    ) -> tuple[response.Opened, Callable[[Task], Awaitable[None]]]:
+        resources = await all_resources(log, self, open.capture.config)
+        resolved = common.resolve_bindings(open.capture.bindings, resources)
+        return common.open(open, resolved)