CliMA
diff --git a/‎Project.toml‎
Lines changed: 13 additions & 2 deletions b/‎Project.toml‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎docs/make.jl‎
Lines changed: 6 additions & 1 deletion b/‎docs/make.jl‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎docs/src/api.md‎
Lines changed: 14 additions & 0 deletions b/‎docs/src/api.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎docs/src/observation_recipe.md‎
Lines changed: 128 additions & 0 deletions b/‎docs/src/observation_recipe.md‎
Lines changed: 128 additions & 0 deletions
@@ -1,7 +1,7 @@
 name = "ClimaCalibrate"
 uuid = "4347a170-ebd6-470c-89d3-5c705c0cacc2"
 authors = ["Climate Modeling Alliance"]
-version = "0.1.1"
+version = "0.1.2"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
@@ -16,13 +16,19 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [weakdeps]
 CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
+ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+NaNStatistics = "b946abbf-3ea7-4610-9019-9858bfdeaf2d"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [extensions]
 CESExt = "CalibrateEmulateSample"
+ClimaAnalysisExt = ["ClimaAnalysis", "NaNStatistics", "Statistics", "LinearAlgebra"]
 
 [compat]
 Aqua = "0.8"
 CalibrateEmulateSample = "0.5, 0.6, 0.7"
+ClimaAnalysis = "0.5.18"
 ClimaParams = "0.10"
 Conda = "1.7, 1.8, 1.9, 1.10"
 Dates = "1"
@@ -32,6 +38,8 @@ EnsembleKalmanProcesses = "1, 2"
 JLD2 = "0.4, 0.5"
 LinearAlgebra = "1"
 Logging = "1.10, 1.11"
+NaNStatistics = "0.6.8 - 0.6.50, 0.6.53"
+OrderedCollections = "1.3"
 Random = "1"
 SafeTestsets = "0.1"
 Statistics = "1"
@@ -43,12 +51,15 @@ julia = "1.9"
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
+ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6"
 ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
 Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+NaNStatistics = "b946abbf-3ea7-4610-9019-9858bfdeaf2d"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "CalibrateEmulateSample", "ClimaParams", "Conda", "LinearAlgebra", "SafeTestsets", "Statistics", "Test"]
+test = ["Aqua", "CalibrateEmulateSample", "ClimaAnalysis", "ClimaParams", "Conda", "LinearAlgebra", "NaNStatistics", "OrderedCollections", "SafeTestsets", "Statistics", "Test"]
@@ -1,6 +1,7 @@
 using Documenter
 using Documenter: doctest
 using ClimaCalibrate
+import ClimaAnalysis # needed to load ClimaAnalysis extension
 using Base.CoreLogging
 using DocumenterCitations
 import Literate
@@ -18,7 +19,10 @@ Literate.markdown(
 
 makedocs(
     plugins = [bib],
-    modules = [ClimaCalibrate],
+    modules = [
+        ClimaCalibrate,
+        Base.get_extension(ClimaCalibrate, :ClimaAnalysisExt),
+    ],
     sitename = "ClimaCalibrate.jl",
     authors = "Clima",
     checkdocs = :exports,
@@ -33,6 +37,7 @@ makedocs(
         "Distributed Calibration Tutorial" => "literate_example.md",
         "Backends" => "backends.md",
         "Observations" => "observations.md",
+        "Observation Recipes" => "observation_recipe.md",
         "Emulate and Sample" => "emulate_sample.md",
         "API" => "api.md",
     ],
 
@@ -67,3 +67,17 @@ ClimaCalibrate.minibatcher_over_samples
 ClimaCalibrate.observation_series_from_samples
 ClimaCalibrate.load_latest_ekp
 ```
+
+## Observation Recipe Interface
+
+```@docs
+ClimaCalibrate.ObservationRecipe.AbstractCovarianceEstimator
+ClimaCalibrate.ObservationRecipe.SeasonalDiagonalCovariance
+ClimaCalibrate.ObservationRecipe.SeasonalDiagonalCovariance()
+ClimaCalibrate.ObservationRecipe.SVDplusDCovariance
+ClimaCalibrate.ObservationRecipe.SVDplusDCovariance(sample_dates)
+ClimaCalibrate.ObservationRecipe.covariance
+ClimaCalibrate.ObservationRecipe.observation
+ClimaCalibrate.ObservationRecipe.seasonally_aligned_yearly_sample_date_ranges
+ClimaCalibrate.ObservationRecipe.change_data_type
+```
@@ -0,0 +1,128 @@
+# ObservationRecipe
+
+!!! warning
+    To enable this extension, use `using ClimaAnalysis` or `import
+    ClimaAnalysis`.
+
+When handling weather and climate data, it can be tedious and error-prone when
+setting up the observation for calibration with `EnsembleKalmanProcesses` (or
+`EKP` for short). As such, ClimaCalibrate provides recipes for setting up
+observations consisting of samples, noise covariances, names, and metadata.
+
+## How do I use this to set up observation for calibration with EKP?
+
+All functions assume that any data preprocessing is done with `ClimaAnalysis`.
+
+### Covariance Estimators
+
+There are currently two covariance estimators, `SeasonalDiagonalCovariance` and
+`SVDplusDCovariance`, which are subtypes of `AbstractCovarianceEstimator`.
+`SeasonalDiagonalCovariance` approximates the observation noise covariance as a
+diagonal of variances across all the seasons for each observation, neglecting
+correlations between points. `SVDplusDCovariance` additionally approximates the
+correlations between points from, often limited, time series observations.
+
+### Necessary data preprocessing
+
+The `OutputVar`s should represent **time series data of summary statistics**.
+For example, to compute seasonal averages of a `OutputVar`, one can use
+`ClimaAnalysis.average_season_across_time`, which will produce a `OutputVar`
+that can be used with either `SeasonalDiagonalCovariance` or
+`SVDplusDCovariance`.
+
+```julia
+import ClimaAnalysis
+
+obs_var = ClimaAnalysis.OutputVar(
+    "precip.mon.mean.nc",
+    "precip",
+    new_start_date = start_date,
+    shift_by = Dates.firstdayofmonth,
+)
+
+# -- preprocessing for units, times, grid, etc. --
+
+seasonal_averages = ClimaAnalysis.average_season_across_time(obs_var)
+```
+
+### Observation
+
+After preprocessing the `OutputVar`s so that they represent time series data of
+summary statistics, one can use set up an `EKP.observation` as shown below.
+
+```julia
+import ClimaAnalysis
+import EnsembleKalmanProcesses as EKP
+import ClimaCalibrate
+import ClimaCalibrate.ObservationRecipe
+
+# Vars are OutputVars preprocessed to ensure consistent units, times,
+# and grid as the diagonstics produced from the model.
+# In this example, we want to calibrate with seasonal averages, so we use
+# ClimaAnalysis.average_season_across_time
+vars = ClimaAnalysis.average_season_across_time.(vars)
+
+# We want the covariance matrix to be Float32, so we change it here.
+vars = ObservationRecipe.change_data_type.(vars, Float32)
+
+# We choose SVDplusDCovariance. We need to supply the start and end dates of
+# the samples with `sample_date_ranges`. To do this, we can use the function
+# below. In this example, the dates in `vars` are all the same. For debugging,
+# it is helpful to use `ClimaAnalysis.dates(var)`.
+sample_date_ranges =
+    ObservationRecipe.seasonally_aligned_yearly_sample_date_ranges(first(vars))
+covar_estimator = SVDplusDCovariance(
+    sample_date_ranges,
+    model_error_scale = Float32(0.05),
+    regularization = Float32(1e-6),
+)
+
+# Finally, we form the observation
+start_date = sample_date_ranges[1][1]
+end_date = sample_date_ranges[1][2]
+obs = ObservationRecipe.observation(
+    covar_estimator,
+    vars,
+    start_date = start_date,
+    end_date = end_date,
+)
+```
+
+## Frequently asked questions
+
+**Q: I need to compute `g_ensemble` and I do not know how the data of the `OutputVar`s is flattened.**
+
+**A:** When forming the sample, the data in a `OutputVar` is flattened using
+`ClimaAnalysis.flatten`. See
+[`ClimaAnalysis.flatten`](https://clima.github.io/ClimaAnalysis.jl/dev/flat/#Flatten)
+in the ClimaAnalysis documentation for more information. The order of the
+variables in the observation is the same as the order of the `OutputVar`s when
+creating the `EKP.Observation` using `ObservationRecipe.observation`.
+
+**Q: How do I handle `NaN`s in the `OutputVar`s so that there are no `NaN`s in the sample and covariance matrix?**
+
+**A:** `NaN`s should be handled when preprocessing the data. In some cases,
+there will be `NaN`s in the data (e.g. calibrating with data that is valid only
+over land). In these cases, the functions for making observations will
+automatically remove `NaN`s from the data. It is important to ensure that across
+the time slices, the `NaN`s appear in the same coordinates of the non-temporal
+dimensions. For example, if the quantity is defined over the dimensions
+longitude, latitude, and time, then any slice of the data at a particular
+longitude and latitude should either only contain `NaN`s or no `NaN`s at all.
+
+**Q: How is the name of the observation determined?**
+
+**A:** The name of the observation is determined by the short name in the
+attributes of the `OutputVar`. If there are multiple `OutputVar`s, then the name
+is all the short names separated by semicolons. If no short name is found, then
+the name will be `nothing`.
+
+**Q: What is `regularization` and `model_error_scale` when making a covariance matrix?**
+
+**A:** The model error scale and regularization terms are used to inflate the
+diagonal of the observation covariance matrix to reflect estimates of
+measurement error. You can add a fixed percentage inflation of the noise due to
+the model error to the covariance matrix with the `model_error_scale` keyword
+argument. Additionally, to prevent very small variance along the diagonal of the
+covariance matrix, you can add a regularization with the `regularization`
+keyword argument.