|
10 | 10 | INGEST_FLUSH_TIMEOUT_SEC, |
11 | 11 | INGEST_QUEUE_MAXSIZE, |
12 | 12 | VERBOSE, |
| 13 | + PRIO_DB, |
13 | 14 | ) |
14 | 15 | import threading |
15 | 16 | import time |
|
21 | 22 | extract_log_excerpt, |
22 | 23 | ) |
23 | 24 | import kcidb_io |
24 | | -from django.db import transaction |
| 25 | +from django.db import connections, transaction |
25 | 26 | from kernelCI_app.models import Issues, Checkouts, Builds, Tests, Incidents |
26 | 27 |
|
27 | 28 | from kernelCI_app.management.commands.helpers.process_submissions import ( |
@@ -111,23 +112,95 @@ def prepare_file_data( |
111 | 112 | } |
112 | 113 |
|
113 | 114 |
|
114 | | -def consume_buffer(buffer: list[TableModels], item_type: TableNames) -> None: |
| 115 | +def _generate_model_insert_query( |
| 116 | + table_name: TableModels, model: TableModels |
| 117 | +) -> tuple[list[str], str]: |
| 118 | + """ |
| 119 | + Dynamically generates the insert query for any model. |
| 120 | +
|
| 121 | + The generated query follows the policy of never updating non-null values. |
| 122 | + This policy can be changed with the `PRIO_DB` env var. |
| 123 | +
|
| 124 | + Returns a list of which model properties can be updated and the insert query. |
| 125 | + """ |
| 126 | + updateable_model_fields: list[str] = [] |
| 127 | + updateable_db_fields: list[str] = [] |
| 128 | + query_params_properties: list[tuple[str, str]] = [] |
| 129 | + |
| 130 | + for field in model._meta.fields: |
| 131 | + if field.generated: |
| 132 | + continue |
| 133 | + |
| 134 | + field_name = ( |
| 135 | + field.name + "_id" |
| 136 | + if field.get_internal_type() == "ForeignKey" |
| 137 | + else field.name |
| 138 | + ) |
| 139 | + real_name = field.db_column or field_name |
| 140 | + operation = "GREATEST" if real_name == "_timestamp" else "COALESCE" |
| 141 | + |
| 142 | + query_params_properties.append((real_name, operation)) |
| 143 | + updateable_model_fields.append(field_name) |
| 144 | + updateable_db_fields.append(real_name) |
| 145 | + |
| 146 | + conflict_clauses = [] |
| 147 | + for field, op in query_params_properties: |
| 148 | + if PRIO_DB: |
| 149 | + conflict_clauses.append( |
| 150 | + f""" |
| 151 | + {field} = {op}({table_name}.{field}, EXCLUDED.{field})""" |
| 152 | + ) |
| 153 | + else: |
| 154 | + conflict_clauses.append( |
| 155 | + f""" |
| 156 | + {field} = {op}(EXCLUDED.{field}, {table_name}.{field})""" |
| 157 | + ) |
| 158 | + |
| 159 | + query = f""" |
| 160 | + INSERT INTO {table_name} ( |
| 161 | + {', '.join(updateable_db_fields)} |
| 162 | + ) |
| 163 | + VALUES ( |
| 164 | + {', '.join(['%s'] * len(updateable_db_fields))} |
| 165 | + ) |
| 166 | + ON CONFLICT (id) |
| 167 | + DO UPDATE SET {', '.join(conflict_clauses)}; |
| 168 | + """ |
| 169 | + |
| 170 | + return updateable_model_fields, query |
| 171 | + |
| 172 | + |
| 173 | +def consume_buffer(buffer: list[TableModels], table_name: TableNames) -> None: |
115 | 174 | """ |
116 | 175 | Consume a buffer of items and insert them into the database. |
117 | 176 | This function is called by the db_worker thread. |
118 | 177 | """ |
119 | 178 | if not buffer: |
120 | 179 | return |
121 | 180 |
|
122 | | - model = MODEL_MAP[item_type] |
| 181 | + try: |
| 182 | + model = MODEL_MAP[table_name] |
| 183 | + except KeyError: |
| 184 | + out(f"Unknown table '{table_name}' passed to consume_buffer") |
| 185 | + raise |
| 186 | + |
| 187 | + updateable_model_fields, query = _generate_model_insert_query(table_name, model) |
| 188 | + |
| 189 | + params = [] |
| 190 | + for obj in buffer: |
| 191 | + obj_values = [] |
| 192 | + for field in updateable_model_fields: |
| 193 | + value = getattr(obj, field) |
| 194 | + if isinstance(value, (dict, list)): |
| 195 | + value = json.dumps(value) |
| 196 | + obj_values.append(value) |
| 197 | + params.append(tuple(obj_values)) |
123 | 198 |
|
124 | 199 | t0 = time.time() |
125 | | - model.objects.bulk_create( |
126 | | - buffer, |
127 | | - batch_size=INGEST_BATCH_SIZE, |
128 | | - ignore_conflicts=True, |
129 | | - ) |
130 | | - out("bulk_create %s: n=%d in %.3fs" % (item_type, len(buffer), time.time() - t0)) |
| 200 | + with connections["default"].cursor() as cursor: |
| 201 | + cursor.executemany(query, params) |
| 202 | + |
| 203 | + out("bulk_create %s: n=%d in %.3fs" % (table_name, len(buffer), time.time() - t0)) |
131 | 204 |
|
132 | 205 |
|
133 | 206 | def flush_buffers( |
|
0 commit comments