Merge pull request #52 from J-Lentz/tentative-cylc-updates

ceblanton · web-flow · commit c8eed11455b1 · 2025-04-03T17:14:54.000-04:00
Support remainder PP chunks
diff --git a/Jinja2Filters/form_remap_dep.py b/Jinja2Filters/form_remap_dep.py
@@ -56,7 +56,7 @@ def form_remap_dep(grid_type: str,
     # Note: history_segment should be specified for primary chunk generation,
     # and omitted for secondary chunk generation.
     if output_type == "ts":
-        if history_segment == chunk:
+        if str(history_segment) == str(chunk):
             prereq_task = "rename-split-to-pp"
         else:
             prereq_task = "make-timeseries"
diff --git a/Jinja2Filters/iter_chunks.py b/Jinja2Filters/iter_chunks.py
@@ -0,0 +1,60 @@
+from metomi.isodatetime.parsers import DurationParser, TimePointParser
+
+def iter_chunks(chunk_sizes, history_segment, pp_start, pp_stop):
+    """Return an iterator over all PP chunks. For each chunk, a dictionary is
+       yielded consisting of `chunk_size`, `cycle_point`, `segments`, and `is_partial` keys.
+
+    Arguments:
+        chunk_sizes (list of strings)
+        history_segment (string)
+        pp_start (string)
+        pp_stop (string)
+
+    Example:
+    >>> list(iter_chunks(["P2Y", "P3Y"], "P1Y", "0001", "0003"))
+    Result:
+    [
+      {
+        'chunk_size': Duration<P2Y>,
+        'cycle_point': TimePoint<0001-01-01>,
+        'segments': [TimePoint<0001-01-01>, TimePoint<0002-01-01>],
+        'is_partial': False
+      },
+      {
+        'chunk_size': Duration<P2Y>,
+        'cycle_point': TimePoint<0003-01-01>,
+        'segments': [TimePoint<0003-01-01>],
+        'is_partial': True
+      },
+      {
+        'chunk_size': Duration<P3Y>,
+        'cycle_point': TimePoint<0001-01-01>,
+        'segments': [TimePoint<0001-01-01>, TimePoint<0002-01-01>, TimePoint<0003-01-01>],
+        'is_partial': False
+      }
+    ]
+"""
+    duration_parser = DurationParser()
+    time_point_parser = TimePointParser(default_to_unknown_time_zone=True)
+
+    chunk_sizes = (duration_parser.parse(cs) for cs in chunk_sizes)
+    history_segment = duration_parser.parse(history_segment)
+    pp_start = time_point_parser.parse(pp_start)
+    pp_stop = time_point_parser.parse(pp_stop)
+
+    def n_segments(interval):
+        return int(interval.get_seconds() / history_segment.get_seconds())
+
+    for cs in chunk_sizes:
+        n_segments_full_chunk = n_segments(cs)
+        cycle_point = pp_start
+        while cycle_point <= pp_stop:
+            n_segments_remaining = n_segments(pp_stop + history_segment - cycle_point)
+            n = min(n_segments_full_chunk, n_segments_remaining)
+            yield {
+                'chunk_size': cs,
+                'cycle_point': cycle_point,
+                'segments': [cycle_point + history_segment*i for i in range(n)],
+                'is_partial': False if n == n_segments_full_chunk else True
+            }
+            cycle_point += cs
diff --git a/app/make-timeavgs/bin/make-timeavgs b/app/make-timeavgs/bin/make-timeavgs
@@ -56,7 +56,7 @@ for dir in ${dirs[@]}; do
 
 	#now need to pick the right date-time format for the cycle-point
 	if [[ $file_freq == P1M ]]; then
-		cyclepoint=$(cylc cycle-point --template CCYY12)
+		cyclepoint=$(cylc cycle-point --template CCYYMM)
         tool_args=month
 	elif [[ $file_freq == P1Y ]]; then
 		cyclepoint=$(cylc cycle-point --template CCYY)
@@ -66,7 +66,7 @@ for dir in ${dirs[@]}; do
 		continue
 	fi
 															   
-	files=($(ls $dir/$interval/$component.*-$cyclepoint.*.nc))
+	files=($(ls $dir/$interval/$component.$cyclepoint-*.*.nc))
 	if [[ ${#files[@]} -lt 1 ]]; then
 		echo "---WARNING--- no files found in directory $dir!"
 		echo "moving on to next directory..."
diff --git a/app/make-timeseries/bin/make-timeseries b/app/make-timeseries/bin/make-timeseries
@@ -126,6 +126,7 @@ echo "    output dir: $outputDir"
 echo "    begin: $begin"
 echo "    input chunk: $inputChunk"
 echo "    output chunk: $outputChunk"
+echo "    pp stop: $pp_stop"
 echo "    component: $component"
 echo "    components allowed to fail: ${fail_ok_components:=}"
 echo "    use subdirs: ${use_subdirs:=}"
@@ -134,7 +135,18 @@ type cdo
 type isodatetime
 
 # Determine how many expected files to concatenate
-expectedChunks=$(( $(isodatetime --as-total=H $outputChunk | sed -e 's/\.0$//') / $(isodatetime --as-total=H $inputChunk | sed -e 's/\.0$//') ))
+inputChunkHrs=$(isodatetime --as-total=H $inputChunk | sed -e 's/\.0$//')
+expectedChunks=$(( $(isodatetime --as-total=H $outputChunk | sed -e 's/\.0$//') / inputChunkHrs ))
+availChunks=$(( $(isodatetime --as-total=H $begin $pp_stop --offset2=$inputChunk | sed -e 's/\.0$//') / inputChunkHrs ))
+
+if ((availChunks >= expectedChunks))
+then
+    end=$(isodatetime $begin --offset $outputChunk)
+else
+    expectedChunks=$availChunks
+    end=$(isodatetime $pp_stop --offset $inputChunk)
+fi
+
 if (( expectedChunks > 0 )); then
     echo NOTE: Expect to concatenate $expectedChunks subchunks
 else
@@ -161,8 +173,6 @@ if [ ! -z "${EPMT_DATA_LINEAGE+x}" ] && [ "$EPMT_DATA_LINEAGE" = "1" ]; then
     echo "Set PYTHONPATH and created i/o lists"
 fi
 
-# Calculate end date
-end=$(isodatetime $begin --offset $outputChunk)
 # remove trailing Z to allow string comparison later
 begin=${begin%Z}
 end=${end%Z}
diff --git a/flow.cylc b/flow.cylc