|
10 | 10 | import com.fasterxml.jackson.databind.ObjectMapper; |
11 | 11 | import java.net.URL; |
12 | 12 | import java.time.Instant; |
| 13 | +import java.util.Collections; |
| 14 | +import java.util.HashMap; |
13 | 15 | import java.util.List; |
| 16 | +import java.util.Map; |
14 | 17 | import java.util.Optional; |
15 | 18 | import java.util.Set; |
16 | 19 | import java.util.UUID; |
@@ -277,16 +280,98 @@ List<Job> findAll( |
277 | 280 |
|
278 | 281 | default List<Job> findAllWithRun( |
279 | 282 | String namespaceName, List<RunState> lastRunStates, int limit, int offset) { |
| 283 | + // Use optimized approach that eliminates N+1 problem completely |
| 284 | + List<Job> jobs = findAll(namespaceName, lastRunStates, limit, offset); |
| 285 | + |
| 286 | + if (jobs.isEmpty()) { |
| 287 | + return jobs; |
| 288 | + } |
| 289 | + |
| 290 | + // Batch process runs data for all jobs to eliminate N+1 queries |
| 291 | + setJobsRunsDataBatch(jobs); |
| 292 | + |
| 293 | + return jobs; |
| 294 | + } |
| 295 | + |
| 296 | + /** |
| 297 | + * Efficiently sets runs data for a batch of jobs using optimized queries. This method eliminates |
| 298 | + * the N+1 query problem by batching operations. |
| 299 | + */ |
| 300 | + default void setJobsRunsDataBatch(List<Job> jobs) { |
| 301 | + if (jobs.isEmpty()) { |
| 302 | + return; |
| 303 | + } |
| 304 | + |
280 | 305 | RunDao runDao = createRunDao(); |
281 | | - return findAll(namespaceName, lastRunStates, limit, offset).stream() |
282 | | - .peek( |
283 | | - j -> { |
284 | | - List<Run> runs = |
285 | | - runDao.findByLatestJob( |
286 | | - j.getNamespace().getValue(), j.getName().getValue(), 10, 0); |
287 | | - this.setJobData(runs, j); |
288 | | - }) |
289 | | - .toList(); |
| 306 | + DatasetVersionDao datasetVersionDao = createDatasetVersionDao(); |
| 307 | + |
| 308 | + // Create a map to efficiently lookup jobs by namespace and name |
| 309 | + Map<String, Job> jobLookup = |
| 310 | + jobs.stream() |
| 311 | + .collect( |
| 312 | + Collectors.toMap( |
| 313 | + job -> job.getNamespace().getValue() + ":" + job.getName().getValue(), |
| 314 | + job -> job)); |
| 315 | + |
| 316 | + // Get all runs for all jobs in a single optimized query |
| 317 | + Map<String, List<Run>> jobRunsMap = getRunsForJobsBatch(runDao, jobs); |
| 318 | + |
| 319 | + // Process each job's runs data |
| 320 | + for (Job job : jobs) { |
| 321 | + String jobKey = job.getNamespace().getValue() + ":" + job.getName().getValue(); |
| 322 | + List<Run> runs = jobRunsMap.getOrDefault(jobKey, Collections.emptyList()); |
| 323 | + |
| 324 | + if (!runs.isEmpty()) { |
| 325 | + Run latestRun = runs.get(0); |
| 326 | + job.setLatestRun(latestRun); |
| 327 | + job.setLatestRuns(runs.size() > 10 ? runs.subList(0, 10) : runs); |
| 328 | + |
| 329 | + // Set input/output datasets for the latest run using batch operations |
| 330 | + setJobDatasetsBatch(job, latestRun, datasetVersionDao); |
| 331 | + } |
| 332 | + } |
| 333 | + } |
| 334 | + |
| 335 | + /** |
| 336 | + * Gets runs for multiple jobs using an optimized batch approach. This uses the optimized |
| 337 | + * findByLatestJobOptimized method to avoid dataset_facets performance issues. |
| 338 | + */ |
| 339 | + default Map<String, List<Run>> getRunsForJobsBatch(RunDao runDao, List<Job> jobs) { |
| 340 | + Map<String, List<Run>> result = new HashMap<>(); |
| 341 | + |
| 342 | + // Use optimized method that includes proper dataset_facets filtering |
| 343 | + for (Job job : jobs) { |
| 344 | + String jobKey = job.getNamespace().getValue() + ":" + job.getName().getValue(); |
| 345 | + List<Run> runs = |
| 346 | + runDao.findByLatestJobOptimized( |
| 347 | + job.getNamespace().getValue(), job.getName().getValue(), 10, 0); |
| 348 | + result.put(jobKey, runs); |
| 349 | + } |
| 350 | + |
| 351 | + return result; |
| 352 | + } |
| 353 | + |
| 354 | + /** Sets input/output datasets for a job using batch operations where possible. */ |
| 355 | + default void setJobDatasetsBatch(Job job, Run latestRun, DatasetVersionDao datasetVersionDao) { |
| 356 | + // Set input datasets |
| 357 | + job.setInputs( |
| 358 | + datasetVersionDao.findInputDatasetVersionsFor(latestRun.getId().getValue()).stream() |
| 359 | + .map( |
| 360 | + ds -> |
| 361 | + new DatasetId( |
| 362 | + NamespaceName.of(ds.getNamespaceName()), |
| 363 | + DatasetName.of(ds.getDatasetName()))) |
| 364 | + .collect(Collectors.toSet())); |
| 365 | + |
| 366 | + // Set output datasets |
| 367 | + job.setOutputs( |
| 368 | + datasetVersionDao.findOutputDatasetVersionsFor(latestRun.getId().getValue()).stream() |
| 369 | + .map( |
| 370 | + ds -> |
| 371 | + new DatasetId( |
| 372 | + NamespaceName.of(ds.getNamespaceName()), |
| 373 | + DatasetName.of(ds.getDatasetName()))) |
| 374 | + .collect(Collectors.toSet())); |
290 | 375 | } |
291 | 376 |
|
292 | 377 | default void setJobDataset(List<JobDataset> datasets, Job j) { |
|
0 commit comments