Skip to content

Commit 9f6305f

Browse files
committed
change from clickhouse to sql fluff using clickhouse dialect
1 parent 4267030 commit 9f6305f

File tree

9 files changed

+234
-248
lines changed

9 files changed

+234
-248
lines changed

.lintrunner.toml

+3-6
Original file line numberDiff line numberDiff line change
@@ -346,17 +346,14 @@ exclude_patterns = [
346346
command = [
347347
'python3',
348348
'tools/linter/adapters/clickhouse_sql_linter.py',
349-
'--binary=.lintbin/clickhouse',
350349
'@{{PATHSFILE}}',
351350
]
352351
init_command = [
353352
'python3',
354-
'tools/linter/adapters/s3_init.py',
355-
'--config-json=tools/linter/adapters/s3_init_config.json',
356-
'--linter=clickhouse',
353+
'python3',
354+
'tools/linter/adapters/pip_init.py',
357355
'--dry-run={{DRYRUN}}',
358-
'--output-dir=.lintbin',
359-
'--output-name=clickhouse',
356+
'sqlfluff',
360357
]
361358
is_formatter = true
362359

tools/linter/adapters/clickhouse_sql_linter.py

+9-39
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,16 @@ def run_command(
6161

6262

6363
def check_file(
64-
binary: str,
6564
filename: str,
6665
) -> List[LintMessage]:
67-
with open(filename) as f:
68-
original = f.read()
69-
7066
try:
7167
proc = run_command(
7268
[
73-
binary,
74-
"--format",
75-
"--comments",
76-
"--query",
77-
original,
69+
"sqlfluff",
70+
"format",
71+
"--dialect",
72+
"clickhouse",
73+
filename,
7874
]
7975
)
8076
except OSError as err:
@@ -92,9 +88,7 @@ def check_file(
9288
)
9389
]
9490

95-
replacement = proc.stdout
96-
if original == replacement:
97-
return []
91+
lint_message = proc.stdout
9892

9993
return [
10094
LintMessage(
@@ -104,9 +98,9 @@ def check_file(
10498
code=LINTER_CODE,
10599
severity=LintSeverity.WARNING,
106100
name="format",
107-
original=original,
108-
replacement=replacement.decode("utf-8"),
109-
description="See https://clickhouse.com/docs/en/operations/utilities/clickhouse-format.\nRun `lintrunner -a` to apply this patch.",
101+
original=None,
102+
replacement=None,
103+
description=lint_message.decode("utf-8"),
110104
)
111105
]
112106

@@ -121,40 +115,16 @@ def main() -> None:
121115
nargs="+",
122116
help="paths to lint",
123117
)
124-
parser.add_argument(
125-
"--binary",
126-
required=True,
127-
help="clickhouse binary path",
128-
)
129118

130119
args = parser.parse_args()
131120

132-
if not os.path.exists(args.binary):
133-
err_msg = LintMessage(
134-
path="<none>",
135-
line=None,
136-
char=None,
137-
code=LINTER_CODE,
138-
severity=LintSeverity.ERROR,
139-
name="command-failed",
140-
original=None,
141-
replacement=None,
142-
description=(
143-
f"Could not find clickhouse binary at {args.binary},"
144-
" you may need to run `lintrunner init`."
145-
),
146-
)
147-
print(json.dumps(err_msg._asdict()), flush=True)
148-
exit(0)
149-
150121
with concurrent.futures.ThreadPoolExecutor(
151122
max_workers=os.cpu_count(),
152123
thread_name_prefix="Thread",
153124
) as executor:
154125
futures = {
155126
executor.submit(
156127
check_file,
157-
args.binary,
158128
filename,
159129
): filename
160130
for filename in args.filenames
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,47 @@
11
--- This query is used by HUD metrics page to get the list of queued jobs
22
with possible_queued_jobs as (
3-
select id, run_id
4-
from default.workflow_job -- FINAL not needed since we just use this to filter a table that has already been FINALed
5-
where status = 'queued'
6-
AND created_at < (CURRENT_TIMESTAMP() - INTERVAL 5 MINUTE)
7-
AND created_at > (CURRENT_TIMESTAMP() - INTERVAL 1 WEEK)
3+
select
4+
id,
5+
run_id
6+
from default.workflow_job -- FINAL not needed since we just use this to filter a table that has already been FINALed
7+
where
8+
status = 'queued'
9+
and created_at < (CURRENT_TIMESTAMP() - interval 5 minute)
10+
and created_at > (CURRENT_TIMESTAMP() - interval 1 week)
811
)
9-
SELECT
10-
DATE_DIFF(
11-
'second',
12-
job.created_at,
13-
CURRENT_TIMESTAMP()
14-
) AS queue_s,
15-
CONCAT(workflow.name, ' / ', job.name) AS name,
16-
job.html_url,
17-
IF(
18-
LENGTH(job.labels) = 0,
19-
'N/A',
12+
13+
select
14+
DATE_DIFF(
15+
'second',
16+
job.created_at,
17+
CURRENT_TIMESTAMP()
18+
) as queue_s,
19+
CONCAT(workflow.name, ' / ', job.name) as name,
20+
job.html_url,
2021
IF(
21-
LENGTH(job.labels) > 1,
22-
job.labels[2],
23-
job.labels[1]
24-
)
25-
) AS machine_type
26-
FROM
27-
default.workflow_job job final
28-
JOIN default.workflow_run workflow final ON workflow.id = job.run_id
29-
WHERE
30-
job.id in (select id from possible_queued_jobs)
31-
and workflow.id in (select run_id from possible_queued_jobs)
32-
and workflow.repository.'full_name' = 'pytorch/pytorch'
33-
AND job.status = 'queued'
34-
/* These two conditions are workarounds for GitHub's broken API. Sometimes */
35-
/* jobs get stuck in a permanently "queued" state but definitely ran. We can */
36-
/* detect this by looking at whether any steps executed (if there were, */
37-
/* obviously the job started running), and whether the workflow was marked as */
38-
/* complete (somehow more reliable than the job-level API) */
39-
AND LENGTH(job.steps) = 0
40-
AND workflow.status != 'completed'
41-
ORDER BY
42-
queue_s DESC
22+
LENGTH(job.labels) = 0,
23+
'N/A',
24+
IF(
25+
LENGTH(job.labels) > 1,
26+
job.labels[2],
27+
job.labels[1]
28+
)
29+
) as machine_type
30+
from
31+
default.workflow_job job final
32+
join default.workflow_run workflow final on workflow.id = job.run_id
33+
where
34+
job.id in (select id from possible_queued_jobs)
35+
and workflow.id in (select run_id from possible_queued_jobs)
36+
and workflow.repository.'full_name' = 'pytorch/pytorch'
37+
and job.status = 'queued'
38+
/* These two conditions are workarounds for GitHub's broken API. Sometimes */
39+
/* jobs get stuck in a permanently "queued" state but definitely ran. We can */
40+
/* detect this by looking at whether any steps executed (if there were, */
41+
/* obviously the job started running), and whether the workflow was marked as */
42+
/* complete (somehow more reliable than the job-level API) */
43+
and LENGTH(job.steps) = 0
44+
and workflow.status != 'completed'
45+
order by
46+
queue_s desc
4347
settings allow_experimental_analyzer = 1;

torchci/clickhouse_queries/queued_jobs_aggregate/query.sql

+57-53
Original file line numberDiff line numberDiff line change
@@ -6,66 +6,70 @@
66
--- additional runners to spin up.
77

88
with possible_queued_jobs as (
9-
select id, run_id
10-
from default.workflow_job
11-
where
12-
status = 'queued'
13-
AND created_at < (
9+
select
10+
id,
11+
run_id
12+
from default.workflow_job
13+
where
14+
status = 'queued'
15+
and created_at < (
1416
-- Only consider jobs that have been queued for a significant period of time
15-
CURRENT_TIMESTAMP() - INTERVAL 30 MINUTE
16-
)
17-
AND created_at > (
17+
CURRENT_TIMESTAMP() - interval 30 minute
18+
)
19+
and created_at > (
1820
-- Queued jobs are automatically cancelled after this long. Any allegedly pending
1921
-- jobs older than this are actually bad data
20-
CURRENT_TIMESTAMP() - INTERVAL 3 DAY
21-
)
22+
CURRENT_TIMESTAMP() - interval 3 day
23+
)
2224
),
23-
queued_jobs as (
24-
SELECT
25-
DATE_DIFF(
26-
'minute',
27-
job.created_at,
28-
CURRENT_TIMESTAMP()
29-
) AS queue_m,
30-
workflow.repository.owner.login as org,
31-
workflow.repository.name as repo,
32-
CONCAT(workflow.name, ' / ', job.name) AS name,
33-
job.html_url,
34-
IF(
35-
LENGTH(job.labels) = 0,
36-
'N/A',
25+
26+
queued_jobs as (
27+
select
28+
DATE_DIFF(
29+
'minute',
30+
job.created_at,
31+
CURRENT_TIMESTAMP()
32+
) as queue_m,
33+
workflow.repository.owner.login as org,
34+
workflow.repository.name as repo,
35+
CONCAT(workflow.name, ' / ', job.name) as name,
36+
job.html_url,
3737
IF(
38-
LENGTH(job.labels) > 1,
39-
job.labels[2],
40-
job.labels[1]
41-
)
42-
) AS runner_label
43-
FROM
44-
default.workflow_job job final
45-
JOIN default.workflow_run workflow final ON workflow.id = job.run_id
46-
WHERE
47-
job.id in (select id from possible_queued_jobs)
48-
and workflow.id in (select run_id from possible_queued_jobs)
49-
and workflow.repository.owner.login in ('pytorch', 'pytorch-labs')
50-
AND job.status = 'queued'
51-
/* These two conditions are workarounds for GitHub's broken API. Sometimes */
52-
/* jobs get stuck in a permanently "queued" state but definitely ran. We can */
53-
/* detect this by looking at whether any steps executed (if there were, */
54-
/* obviously the job started running), and whether the workflow was marked as */
55-
/* complete (somehow more reliable than the job-level API) */
56-
AND LENGTH(job.steps) = 0
57-
AND workflow.status != 'completed'
58-
ORDER BY
59-
queue_m DESC
38+
LENGTH(job.labels) = 0,
39+
'N/A',
40+
IF(
41+
LENGTH(job.labels) > 1,
42+
job.labels[2],
43+
job.labels[1]
44+
)
45+
) as runner_label
46+
from
47+
default.workflow_job job final
48+
join default.workflow_run workflow final on workflow.id = job.run_id
49+
where
50+
job.id in (select id from possible_queued_jobs)
51+
and workflow.id in (select run_id from possible_queued_jobs)
52+
and workflow.repository.owner.login in ('pytorch', 'pytorch-labs')
53+
and job.status = 'queued'
54+
/* These two conditions are workarounds for GitHub's broken API. Sometimes */
55+
/* jobs get stuck in a permanently "queued" state but definitely ran. We can */
56+
/* detect this by looking at whether any steps executed (if there were, */
57+
/* obviously the job started running), and whether the workflow was marked as */
58+
/* complete (somehow more reliable than the job-level API) */
59+
and LENGTH(job.steps) = 0
60+
and workflow.status != 'completed'
61+
order by
62+
queue_m desc
6063
)
64+
6165
select
62-
runner_label,
63-
org,
64-
repo,
65-
count(*) as num_queued_jobs,
66-
min(queue_m) as min_queue_time_minutes,
67-
max(queue_m) as max_queue_time_minutes
66+
runner_label,
67+
org,
68+
repo,
69+
COUNT(*) as num_queued_jobs,
70+
MIN(queue_m) as min_queue_time_minutes,
71+
MAX(queue_m) as max_queue_time_minutes
6872
from queued_jobs
6973
group by runner_label, org, repo
7074
order by max_queue_time_minutes desc
71-
settings allow_experimental_analyzer = 1;
75+
settings allow_experimental_analyzer = 1;

0 commit comments

Comments
 (0)