Skip to content
This repository was archived by the owner on Feb 13, 2026. It is now read-only.

Commit c7b5ef6

Browse files
authored
Merge pull request #298 from punch-mission/querying-work
Speed up some DB queries
2 parents 68725dd + dfbb6bb commit c7b5ef6

3 files changed

Lines changed: 29 additions & 40 deletions

File tree

punchpipe/control/db.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ def directory(self, root: str):
6161
return os.path.join(root, self.level, self.file_type + self.observatory, self.date_obs.strftime("%Y/%m/%d"))
6262

6363

64-
Index("date_obs_index", File.date_obs, mysql_using="btree", mariadb_using="btree")
65-
Index("observatory_index", File.observatory, mysql_using="hash", mariadb_using="hash")
66-
Index("file_type_index", File.file_type, mysql_using="hash", mariadb_using="hash")
67-
Index("processing_flow_index", File.processing_flow, mysql_using="hash", mariadb_using="hash")
68-
Index("ready_files_index", File.file_type, File.level, File.state, File.observatory, File.outlier, File.date_obs)
64+
Index("get_ready_files", File.state, File.level, File.date_obs.desc(), File.file_type, File.observatory)
65+
Index("get_ready_files_alt", File.level, File.date_obs.desc(), File.file_type, File.state)
66+
Index("construct_background", File.level, File.observatory, File.outlier, File.date_obs, File.state, File.file_type)
67+
Index("get_cal_file", File.file_type, File.observatory, File.date_obs, File.state)
68+
Index("CNN", File.file_type, File.observatory, File.level, File.state, File.outlier)
6969

7070

7171
class Flow(Base):
@@ -88,6 +88,10 @@ def __repr__(self):
8888
return f"Flow(id={self.flow_id!r})"
8989

9090

91+
Index("flow_stats", Flow.end_time, Flow.flow_type, Flow.state)
92+
Index("flow_cards", Flow.start_time, Flow.flow_level, Flow.flow_type)
93+
94+
9195
class FileRelationship(Base):
9296
__tablename__ = "relationships"
9397
relationship_id = Column(Integer, primary_key=True)

punchpipe/flows/level1.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -223,15 +223,10 @@ def get_two_closest_stray_light(level0_file, session=None, max_distance: timedel
223223
text("second"), File.date_obs, level0_file.date_obs)))
224224
.filter(File.file_type == model_type)
225225
.filter(File.observatory == level0_file.observatory)
226-
.filter(File.state == "created")
227-
.filter(File.file_version.not_like("v%"))) #filters out "v0a".
226+
.filter(File.state == "created"))
228227
if max_distance:
229228
best_models = best_models.filter(dt < max_distance.total_seconds())
230-
highest_version = best_models.order_by(File.file_version).first()
231-
if highest_version is None:
232-
return None, None
233-
highest_version = highest_version[0].file_version
234-
best_models = best_models.filter(File.file_version == highest_version).order_by(dt.asc()).limit(2).all()
229+
best_models = best_models.order_by(dt.asc()).limit(2).all()
235230
if len(best_models) < 2:
236231
return None, None
237232
# Drop the dt values
@@ -246,17 +241,13 @@ def get_two_best_stray_light(level0_file, session=None):
246241
before_model = (session.query(File)
247242
.filter(File.file_type == model_type)
248243
.filter(File.observatory == level0_file.observatory)
249-
.filter(File.level == '1')
250244
.filter(File.date_obs < level0_file.date_obs)
251-
.filter(File.file_version.not_like("v%")) #filters out "v0a".
252-
.order_by(File.file_version.desc(), File.date_obs.desc()).first())
245+
.order_by(File.date_obs.desc()).first())
253246
after_model = (session.query(File)
254247
.filter(File.file_type == model_type)
255248
.filter(File.observatory == level0_file.observatory)
256-
.filter(File.level == '1')
257249
.filter(File.date_obs > level0_file.date_obs)
258-
.filter(File.file_version.not_like("v%")) #filters out "v0a".
259-
.order_by(File.file_version.desc(), File.date_obs.asc()).first())
250+
.order_by(File.date_obs.asc()).first())
260251
if before_model is None or after_model is None:
261252
# We're waiting for the scheduler to fill in here and tell us what's what
262253
return None, None
@@ -285,6 +276,15 @@ def get_two_best_stray_light(level0_file, session=None):
285276
return None, None
286277

287278

279+
def get_first_last_stray_light(session):
280+
dates = (session.query(func.min(File.date_obs), func.max(File.date_obs))
281+
.where(File.file_type.like('S%')).
282+
where(File.state == 'created')).all()
283+
if len(dates) == 0:
284+
return datetime(1900, 1, 1), datetime(2900, 1, 1)
285+
return dates[0]
286+
287+
288288
def get_quartic_model_paths(level0_files, pipeline_config: dict, session=None):
289289
# Get all models, in reverse-chronological order
290290
models = (session.query(File)
@@ -446,6 +446,7 @@ def level1_early_process_flow(flow_id: int | list[int], pipeline_config_path=Non
446446
@task(cache_policy=NO_CACHE)
447447
def level1_late_query_ready_files(session, pipeline_config: dict, reference_time=None, max_n=9e99):
448448
logger = get_run_logger()
449+
start_date, end_date = get_first_last_stray_light(session)
449450
parent = aliased(File)
450451
child = aliased(File)
451452
child_exists_subquery = (session.query(parent)
@@ -459,6 +460,8 @@ def level1_late_query_ready_files(session, pipeline_config: dict, reference_time
459460
.filter(File.level == "1")
460461
.filter(File.state.in_(["created", "progressed"]))
461462
.filter(~child_exists_subquery)
463+
.filter(File.date_obs >= start_date)
464+
.filter(File.date_obs <= end_date)
462465
.order_by(File.date_obs.desc()).all())
463466

464467
distortion_paths = get_distortion_paths(ready, pipeline_config, session)

punchpipe/flows/tests/test_level1.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -128,47 +128,31 @@ def session_fn(session):
128128
file_type="SM",
129129
observatory='2',
130130
state='created',
131-
file_version='0b',
131+
file_version='1',
132132
software_version='none',
133133
date_obs=datetime.now(UTC)-timedelta(hours=12))
134134

135135
stray_light_before1 = File(level="1",
136136
file_type="SM",
137137
observatory='2',
138138
state='created',
139-
file_version='1',
139+
file_version='2',
140140
software_version='none',
141141
date_obs=datetime.now(UTC)-timedelta(hours=16))
142142

143-
stray_light_before0 = File(level="1",
144-
file_type="SM",
145-
observatory='2',
146-
state='created',
147-
file_version='0b',
148-
software_version='none',
149-
date_obs=datetime.now(UTC)-timedelta(hours=12))
150-
151-
stray_light_after2 = File(level="1",
152-
file_type="SM",
153-
observatory='2',
154-
state='created',
155-
file_version='0b',
156-
software_version='none',
157-
date_obs=datetime.now(UTC)+timedelta(hours=12))
158-
159143
stray_light_after1 = File(level="1",
160144
file_type="SM",
161145
observatory='2',
162146
state='created',
163-
file_version='1',
147+
file_version='2',
164148
software_version='none',
165149
date_obs=datetime.now(UTC)+timedelta(hours=16))
166150

167151
stray_light_after0 = File(level="1",
168152
file_type="SM",
169153
observatory='2',
170154
state='created',
171-
file_version='0b',
155+
file_version='1',
172156
software_version='none',
173157
date_obs=datetime.now(UTC)+timedelta(hours=12))
174158

@@ -236,12 +220,10 @@ def session_fn(session):
236220
session.add(distortion0)
237221
session.add(distortion1)
238222
session.add(distortion2)
239-
session.add(stray_light_before0)
240223
session.add(stray_light_before1)
241224
session.add(stray_light_before2)
242225
session.add(stray_light_after0)
243226
session.add(stray_light_after1)
244-
session.add(stray_light_after2)
245227
session.add(mask_file0)
246228
session.add(mask_file1)
247229
session.add(mask_file2)

0 commit comments

Comments
 (0)