@@ -466,6 +466,8 @@ def create_job(
466466 python_version : str | None = None ,
467467 params : dict [str , str ] | None = None ,
468468 parent_job_id : str | None = None ,
469+ rerun_from_job_id : str | None = None ,
470+ run_group_id : str | None = None ,
469471 ) -> str :
470472 """
471473 Creates a new job.
@@ -1835,7 +1837,11 @@ def _jobs_columns() -> "list[SchemaItem]":
18351837 Column ("params" , JSON , nullable = False ),
18361838 Column ("metrics" , JSON , nullable = False ),
18371839 Column ("parent_job_id" , Text , nullable = True ),
1840+ Column ("rerun_from_job_id" , Text , nullable = True ),
1841+ Column ("run_group_id" , Text , nullable = True ),
18381842 Index ("idx_jobs_parent_job_id" , "parent_job_id" ),
1843+ Index ("idx_jobs_rerun_from_job_id" , "rerun_from_job_id" ),
1844+ Index ("idx_jobs_run_group_id" , "run_group_id" ),
18391845 ]
18401846
18411847 @cached_property
@@ -1896,13 +1902,29 @@ def create_job(
18961902 python_version : str | None = None ,
18971903 params : dict [str , str ] | None = None ,
18981904 parent_job_id : str | None = None ,
1905+ rerun_from_job_id : str | None = None ,
1906+ run_group_id : str | None = None ,
18991907 conn : Any = None ,
19001908 ) -> str :
19011909 """
19021910 Creates a new job.
19031911 Returns the job id.
19041912 """
19051913 job_id = str (uuid4 ())
1914+
1915+ # Validate run_group_id and rerun_from_job_id consistency
1916+ if rerun_from_job_id :
1917+ # Rerun job: run_group_id must be provided by caller
1918+ assert run_group_id is not None , (
1919+ "run_group_id must be provided when rerun_from_job_id is set"
1920+ )
1921+ else :
1922+ # First job: run_group_id should not be provided (we set it here)
1923+ assert run_group_id is None , (
1924+ "run_group_id should not be provided when rerun_from_job_id is not set"
1925+ )
1926+ run_group_id = job_id
1927+
19061928 self .db .execute (
19071929 self ._jobs_insert ().values (
19081930 id = job_id ,
@@ -1918,6 +1940,8 @@ def create_job(
19181940 params = json .dumps (params or {}),
19191941 metrics = json .dumps ({}),
19201942 parent_job_id = parent_job_id ,
1943+ rerun_from_job_id = rerun_from_job_id ,
1944+ run_group_id = run_group_id ,
19211945 ),
19221946 conn = conn ,
19231947 )
@@ -2191,35 +2215,47 @@ def link_dataset_version_to_job(
21912215 self .db .execute (update_query , conn = conn )
21922216
21932217 def get_ancestor_job_ids (self , job_id : str , conn = None ) -> list [str ]:
2194- # Use recursive CTE to walk up the parent chain
2195- # Format: WITH RECURSIVE ancestors(id, parent_job_id, depth) AS (...)
2218+ # Use recursive CTE to walk up the rerun chain
2219+ # Format: WITH RECURSIVE ancestors(id, rerun_from_job_id, run_group_id,
2220+ # depth) AS (...)
21962221 # Include depth tracking to prevent infinite recursion in case of
21972222 # circular dependencies
21982223 ancestors_cte = (
21992224 self ._jobs_select (
22002225 self ._jobs .c .id .label ("id" ),
2201- self ._jobs .c .parent_job_id .label ("parent_job_id" ),
2226+ self ._jobs .c .rerun_from_job_id .label ("rerun_from_job_id" ),
2227+ self ._jobs .c .run_group_id .label ("run_group_id" ),
22022228 literal (0 ).label ("depth" ),
22032229 )
22042230 .where (self ._jobs .c .id == job_id )
22052231 .cte (name = "ancestors" , recursive = True )
22062232 )
22072233
22082234 # Recursive part: join with parent jobs, incrementing depth and checking limit
2235+ # Also ensure we only traverse jobs within the same run_group_id for safety
22092236 ancestors_recursive = ancestors_cte .union_all (
22102237 self ._jobs_select (
22112238 self ._jobs .c .id .label ("id" ),
2212- self ._jobs .c .parent_job_id .label ("parent_job_id" ),
2239+ self ._jobs .c .rerun_from_job_id .label ("rerun_from_job_id" ),
2240+ self ._jobs .c .run_group_id .label ("run_group_id" ),
22132241 (ancestors_cte .c .depth + 1 ).label ("depth" ),
22142242 ).select_from (
22152243 self ._jobs .join (
22162244 ancestors_cte ,
22172245 (
22182246 self ._jobs .c .id
2219- == cast (ancestors_cte .c .parent_job_id , self ._jobs .c .id .type )
2247+ == cast (ancestors_cte .c .rerun_from_job_id , self ._jobs .c .id .type )
22202248 )
2221- & (ancestors_cte .c .parent_job_id .isnot (None )) # Stop at root jobs
2222- & (ancestors_cte .c .depth < JOB_ANCESTRY_MAX_DEPTH ),
2249+ & (
2250+ ancestors_cte .c .rerun_from_job_id .isnot (None )
2251+ ) # Stop at root jobs
2252+ & (ancestors_cte .c .depth < JOB_ANCESTRY_MAX_DEPTH )
2253+ & (
2254+ self ._jobs .c .run_group_id
2255+ == cast (
2256+ ancestors_cte .c .run_group_id , self ._jobs .c .run_group_id .type
2257+ )
2258+ ), # Safety: only traverse within same run group
22232259 )
22242260 )
22252261 )
0 commit comments