|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Generate a ClickHouse migration from an OPS declarative-HCL change. |
| 3 | +
|
| 4 | +Pipeline: run the OPS diff (committed HCL -> working tree, via ops/diff.sh) to |
| 5 | +get the DDL `hclexp` would apply, map each statement to its node-role targeting |
| 6 | +using topology.py, and emit a migration whose `operations` are |
| 7 | +run_sql_with_exceptions(...) calls ready to drop into |
| 8 | +posthog/clickhouse/migrations/. |
| 9 | +
|
| 10 | +The HCL supplies *what* (the DDL); topology.py supplies *where* (node_roles); the |
| 11 | +engine kind in topology.py supplies sharded / is_alter_on_replicated_table. |
| 12 | +
|
| 13 | +Usage (from anywhere; paths resolve relative to this file): |
| 14 | + HCLEXP_BIN=../python-clickhouse-schema/hclexp \ |
| 15 | + python posthog/clickhouse/hcl/ops/codegen/gen_migration.py --name add_demo_column |
| 16 | + # options: --ref <git-ref> (default HEAD), --out <path|-> (default stdout) |
| 17 | +""" |
| 18 | + |
| 19 | +from __future__ import annotations |
| 20 | + |
| 21 | +import argparse |
| 22 | +import os |
| 23 | +import re |
| 24 | +import subprocess |
| 25 | +import sys |
| 26 | + |
| 27 | +HERE = os.path.dirname(os.path.abspath(__file__)) |
| 28 | +REPO_ROOT = os.path.abspath(os.path.join(HERE, "..", "..", "..", "..", "..")) |
| 29 | +DIFF_SH = os.path.join("posthog", "clickhouse", "hcl", "ops", "diff.sh") |
| 30 | + |
| 31 | +sys.path.insert(0, HERE) |
| 32 | +from topology import TOPOLOGY # noqa: E402 |
| 33 | + |
| 34 | +# op keyword -> (kind, is-alter?). Longer keywords first so the regex is greedy. |
| 35 | +_OPS = [ |
| 36 | + ("ALTER TABLE", "ALTER", True), |
| 37 | + ("CREATE TABLE IF NOT EXISTS", "CREATE", False), |
| 38 | + ("CREATE TABLE", "CREATE", False), |
| 39 | + ("CREATE MATERIALIZED VIEW", "CREATE", False), |
| 40 | + ("CREATE OR REPLACE VIEW", "CREATE", False), |
| 41 | + ("CREATE VIEW", "CREATE", False), |
| 42 | + ("DROP TABLE IF EXISTS", "DROP", False), |
| 43 | + ("DROP TABLE", "DROP", False), |
| 44 | + ("RENAME TABLE", "RENAME", False), |
| 45 | +] |
| 46 | +_OP_RE = re.compile( |
| 47 | + r"^\s*(" + "|".join(re.escape(k) for k, _, _ in _OPS) + r")\s+`?(?:posthog\.)?`?([A-Za-z0-9_$]+)`?" |
| 48 | +) |
| 49 | + |
| 50 | + |
| 51 | +def run_diff(ref: str) -> str: |
| 52 | + env = dict(os.environ) |
| 53 | + return subprocess.run( |
| 54 | + ["bash", DIFF_SH, ref], cwd=REPO_ROOT, env=env, capture_output=True, text=True, check=True |
| 55 | + ).stdout |
| 56 | + |
| 57 | + |
| 58 | +def parse_statements(diff_out: str) -> tuple[list[tuple[str, str]], set[str]]: |
| 59 | + """Return (unique [statement, env] pairs in order) and the set of UNSAFE tables. |
| 60 | +
|
| 61 | + Statements are accumulated until a line ends with ';' (hclexp may wrap a |
| 62 | + CREATE across lines). Section headers from diff.sh set the current env. |
| 63 | + """ |
| 64 | + statements: list[tuple[str, str]] = [] |
| 65 | + unsafe: set[str] = set() |
| 66 | + seen: set[str] = set() |
| 67 | + env = "?" |
| 68 | + buf: list[str] = [] |
| 69 | + for line in diff_out.splitlines(): |
| 70 | + if line.startswith("# ") and "committed@" in line: |
| 71 | + env = line[2:].split()[0] |
| 72 | + continue |
| 73 | + if line.startswith("==") or not line.strip(): |
| 74 | + continue |
| 75 | + if line.startswith("-- UNSAFE:"): |
| 76 | + m = re.search(r"posthog\.([A-Za-z0-9_$]+)", line) |
| 77 | + if m: |
| 78 | + unsafe.add(m.group(1)) |
| 79 | + continue |
| 80 | + if line.startswith("--"): # "-- no changes" etc. |
| 81 | + continue |
| 82 | + buf.append(line.rstrip()) |
| 83 | + if line.rstrip().endswith(";"): |
| 84 | + stmt = " ".join(buf).strip() |
| 85 | + buf = [] |
| 86 | + if stmt not in seen: |
| 87 | + seen.add(stmt) |
| 88 | + statements.append((stmt, env)) |
| 89 | + return statements, unsafe |
| 90 | + |
| 91 | + |
| 92 | +def classify(stmt: str) -> tuple[str, str]: |
| 93 | + m = _OP_RE.match(stmt) |
| 94 | + if not m: |
| 95 | + raise SystemExit(f"ERROR: cannot parse op/table from statement:\n {stmt}") |
| 96 | + keyword, table = m.group(1), m.group(2) |
| 97 | + kind = next(k for kw, k, _ in _OPS if kw == keyword) |
| 98 | + return kind, table |
| 99 | + |
| 100 | + |
| 101 | +def emit_operation(stmt: str, kind: str, table: str) -> str: |
| 102 | + if table not in TOPOLOGY: |
| 103 | + raise SystemExit( |
| 104 | + f"ERROR: object {table!r} is not in topology.py. Add it (node_roles, " |
| 105 | + f"replicated, sharded) before generating — node_roles is a deliberate choice." |
| 106 | + ) |
| 107 | + roles, replicated, sharded = TOPOLOGY[table] |
| 108 | + is_alter_repl = kind == "ALTER" and replicated |
| 109 | + roles_src = "[" + ", ".join(f"NodeRole.{r}" for r in roles) + "]" |
| 110 | + sql_lit = stmt[:-1] if stmt.endswith(";") else stmt # the runner appends nothing; keep as-is sans ';' |
| 111 | + return ( |
| 112 | + " run_sql_with_exceptions(\n" |
| 113 | + f" {sql_lit!r},\n" |
| 114 | + f" node_roles={roles_src},\n" |
| 115 | + f" sharded={sharded},\n" |
| 116 | + f" is_alter_on_replicated_table={is_alter_repl},\n" |
| 117 | + " )," |
| 118 | + ) |
| 119 | + |
| 120 | + |
| 121 | +def main() -> None: |
| 122 | + ap = argparse.ArgumentParser() |
| 123 | + ap.add_argument("--name", required=True, help="migration slug, e.g. add_demo_column") |
| 124 | + ap.add_argument("--ref", default="HEAD", help="git ref to diff the working tree against") |
| 125 | + ap.add_argument("--out", default="-", help="output path, or - for stdout") |
| 126 | + args = ap.parse_args() |
| 127 | + |
| 128 | + statements, unsafe = parse_statements(run_diff(args.ref)) |
| 129 | + if not statements: |
| 130 | + raise SystemExit("No DDL generated — the OPS HCL has no changes vs the ref.") |
| 131 | + |
| 132 | + ops, warnings = [], [] |
| 133 | + for stmt, _env in statements: |
| 134 | + kind, table = classify(stmt) |
| 135 | + if table in unsafe: |
| 136 | + warnings.append(f"# UNSAFE (review/recreate by hand): {kind} {table}") |
| 137 | + ops.append(emit_operation(stmt, kind, table)) |
| 138 | + |
| 139 | + body = ( |
| 140 | + '"""AUTO-GENERATED from the OPS declarative HCL by ' |
| 141 | + "posthog/clickhouse/hcl/ops/codegen/gen_migration.py.\n" |
| 142 | + "Review node_roles / sharded / is_alter_on_replicated_table before committing.\n" |
| 143 | + '"""\n' |
| 144 | + "from posthog.clickhouse.client.connection import NodeRole\n" |
| 145 | + "from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions\n\n" |
| 146 | + ) |
| 147 | + if warnings: |
| 148 | + body += "\n".join(warnings) + "\n\n" |
| 149 | + body += "operations = [\n" + "\n".join(ops) + "\n]\n" |
| 150 | + |
| 151 | + if args.out == "-": |
| 152 | + sys.stdout.write(body) |
| 153 | + else: |
| 154 | + with open(args.out, "w") as f: |
| 155 | + f.write(body) |
| 156 | + sys.stderr.write(f"wrote {args.out}\n") |
| 157 | + |
| 158 | + |
| 159 | +if __name__ == "__main__": |
| 160 | + main() |
0 commit comments