|
5 | 5 | import signal |
6 | 6 | from datetime import datetime |
7 | 7 | from pathlib import Path |
| 8 | +from time import perf_counter |
8 | 9 |
|
9 | | -from flask import Flask, g, render_template, request |
| 10 | +from flask import Flask, Response, g, jsonify, render_template, request |
10 | 11 |
|
11 | 12 | from abdiff.core.utils import read_run_json |
12 | 13 | from abdiff.webapp.utils import ( |
13 | | - get_field_sample_records, |
14 | 14 | get_record_a_b_versions, |
15 | 15 | get_record_field_diff_summary, |
16 | 16 | get_record_unified_diff_string, |
17 | 17 | get_run_directory, |
18 | | - get_source_sample_records, |
| 18 | + query_duckdb_for_records_datatable, |
19 | 19 | ) |
20 | 20 |
|
21 | 21 | logger = logging.getLogger(__name__) |
@@ -61,7 +61,7 @@ def job() -> str: |
61 | 61 | with open(run_json_filepath) as f: |
62 | 62 | run_data = json.load(f) |
63 | 63 | runs[run_data["run_timestamp"]] = run_data |
64 | | - dict( |
| 64 | + runs = dict( |
65 | 65 | sorted( |
66 | 66 | runs.items(), |
67 | 67 | key=lambda x: datetime.strptime( # noqa: DTZ007 |
@@ -89,63 +89,49 @@ def run(run_timestamp: str) -> str: |
89 | 89 | except FileNotFoundError: |
90 | 90 | transform_logs = "'logs.txt' not found for transform logs" |
91 | 91 |
|
92 | | - # parse run metrics |
93 | | - metrics = run_data.get( |
94 | | - "metrics", {"warning": "'metrics' section not found in run data"} |
95 | | - ) |
96 | | - |
97 | | - # generate links for field and source samples |
98 | | - field_samples = { |
99 | | - field: f"http://{request.host}/run/{run_timestamp}/sample/field/{field}" |
100 | | - for field in metrics["summary"]["fields_with_diffs"] |
101 | | - } |
102 | | - source_samples = { |
103 | | - source: f"http://{request.host}/run/{run_timestamp}/sample/source/{source}" |
104 | | - for source in metrics["summary"]["sources"] |
105 | | - } |
106 | | - sample_links = { |
107 | | - "field_samples": field_samples, |
108 | | - "source_samples": source_samples, |
109 | | - } |
110 | | - |
111 | 92 | return render_template( |
112 | 93 | "run.html", |
113 | 94 | run_data=run_data, |
114 | 95 | run_json=json.dumps(run_data), |
115 | 96 | transform_logs=transform_logs, |
116 | | - metrics_json=json.dumps(metrics), |
117 | | - sample_links=sample_links, |
| 97 | + metrics_json=json.dumps(run_data["metrics"]), |
| 98 | + sources=sorted(run_data["metrics"]["summary"]["sources"]), |
| 99 | + modified_fields=sorted(run_data["metrics"]["summary"]["fields_with_diffs"]), |
118 | 100 | ) |
119 | 101 |
|
120 | | - @app.route( |
121 | | - "/run/<run_timestamp>/sample/<sample_type>/<sample_value>", methods=["GET"] |
122 | | - ) |
123 | | - def run_sample(run_timestamp: str, sample_type: str, sample_value: str) -> str: |
124 | | - """Route to provide links to record views based on a subset of detected diffs.""" |
| 102 | + @app.route("/run/<run_timestamp>/records/data", methods=["POST"]) |
| 103 | + def records_data(run_timestamp: str) -> Response: |
| 104 | + """Endpoint to provide data for Records table in Run view. |
| 105 | +
|
| 106 | + The Javascript library DataTables (https://datatables.net/) is used to create the |
| 107 | + Records table in the Run view. This table is configured to make HTTP POST |
| 108 | + requests to an endpoint for filtered, paginated data that supplies the table. This |
| 109 | + endpoint provides that data. |
| 110 | +
|
| 111 | + The POST request payload conforms to the request signature here: |
| 112 | + https://datatables.net/manual/server-side. This endpoint receives the parameters |
| 113 | + from the table (e.g. page, ordering, filtering, etc.), parses the query parameters |
| 114 | + from the request payload, and passes to a utility function which performs the |
| 115 | + DuckDB query, returning a dataframe of results suitable for the table. |
| 116 | + """ |
| 117 | + start_time = perf_counter() |
125 | 118 | run_directory = get_run_directory(run_timestamp) |
| 119 | + run_data = read_run_json(run_directory) |
126 | 120 |
|
127 | | - # get sample records |
128 | | - if sample_type == "field": |
129 | | - sample_df = get_field_sample_records(run_directory, sample_value) |
130 | | - elif sample_type == "source": |
131 | | - sample_df = get_source_sample_records(run_directory, sample_value) |
132 | | - else: |
133 | | - raise ValueError( # noqa: TRY003 |
134 | | - f"Sample type: '{sample_type}' not recognized" |
135 | | - ) |
136 | | - sample_df["record_link"] = sample_df.timdex_record_id.apply( |
137 | | - lambda timdex_record_id: ( |
138 | | - f"http://{request.host}/run/{run_timestamp}/record/{timdex_record_id}" |
139 | | - ) |
| 121 | + datatables_data = query_duckdb_for_records_datatable( |
| 122 | + run_data["duckdb_filepath"], |
| 123 | + draw=int(request.form.get("draw", "1")), |
| 124 | + start=int(request.form.get("start", "0")), |
| 125 | + length=int(request.form.get("length", "10")), |
| 126 | + search_value=request.form.get("search[value]", ""), |
| 127 | + order_column_index=int(request.form.get("order[0][column]", "0")), |
| 128 | + order_direction=request.form.get("order[0][dir]", "asc"), |
| 129 | + source_filter=request.form.getlist("sourceFilter[]"), |
| 130 | + modified_fields_filter=request.form.getlist("modifiedFieldsFilter[]"), |
140 | 131 | ) |
141 | | - sample_df = sample_df.sort_values(by=["source", "timdex_record_id"]) |
142 | 132 |
|
143 | | - return render_template( |
144 | | - "sample.html", |
145 | | - sample_type=sample_type, |
146 | | - sample_value=sample_value, |
147 | | - sample_df=sample_df, |
148 | | - ) |
| 133 | + logger.info(f"records data elapsed: {perf_counter()-start_time}") |
| 134 | + return jsonify(datatables_data) |
149 | 135 |
|
150 | 136 | @app.route("/run/<run_timestamp>/record/<timdex_record_id>", methods=["GET"]) |
151 | 137 | def record(run_timestamp: str, timdex_record_id: str) -> str: |
|
0 commit comments