Skip to content

Commit dae5ab6

Browse files
rename granularity + add doc
1 parent 06a2f5a commit dae5ab6

File tree

9 files changed

+100
-90
lines changed

9 files changed

+100
-90
lines changed

changelog/6.feature.md

Lines changed: 0 additions & 2 deletions
This file was deleted.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
Changed default citations to be 'model-level' citations rather than 'model-experiment-level' citations.
1+
Changed default citations to be 'model' granularity citations rather than 'model-experiment' granularity citations.
22
In practice, this means that you will get fewer citations and they will be the ones that apply to all submissions for a given model, rather than one citation for each model-experiment combination that is found.
File renamed without changes.

changelog/7.feature.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
- Added support for taking paths to CMIP netCDF files as input
2+
- Added `doi_granularity` option so users can specify whether they want citations at the model or experiment granularity

docs/how-to-guides/get-citations-advanced.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
from cmipcite.citations import (
3232
AuthorListStyle,
33-
DOILevel,
33+
DOIGranularity,
3434
get_bibtex_citation,
3535
get_citations,
3636
get_text_citation,
@@ -52,7 +52,7 @@
5252
# %%
5353
bibtex_citations = get_citations(
5454
["hdl:21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b"],
55-
doi_level=DOILevel.MODEL,
55+
doi_granularity=DOIGranularity.MODEL,
5656
get_citation=get_bibtex_citation,
5757
)
5858
print(f"{len(bibtex_citations)=}")
@@ -70,7 +70,7 @@
7070
# %%
7171
plaintex_citations = get_citations(
7272
["hdl:21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b"],
73-
doi_level=DOILevel.EXPERIMENT,
73+
doi_granularity=DOIGranularity.EXPERIMENT,
7474
get_citation=partial(get_text_citation, author_list_style=AuthorListStyle.LONG),
7575
)
7676

@@ -84,7 +84,7 @@
8484
# %%
8585
plaintex_citations = get_citations(
8686
["hdl:21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b"],
87-
doi_level=DOILevel.MODEL,
87+
doi_granularity=DOIGranularity.MODEL,
8888
get_citation=partial(get_text_citation, author_list_style=AuthorListStyle.SHORT),
8989
)
9090

@@ -120,7 +120,7 @@ def get_my_citation(doi: str, version: str) -> str:
120120
# %%
121121
custom_citations = get_citations(
122122
["hdl:21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b"],
123-
doi_level=DOILevel.MODEL,
123+
doi_granularity=DOIGranularity.MODEL,
124124
get_citation=get_my_citation,
125125
)
126126

docs/how-to-guides/get-citations-basic.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,17 @@
1515
# %% [markdown] editable=true slideshow={"slide_type": ""}
1616
# # How to get citations ? (Basic version)
1717
#
18-
# Here, we show how you can get citations for CMIP data.
19-
# Citation can be retrieved with the help of the Persistent IDentifiers (PIDs).
18+
# Citations can be retrieved with the help of the Persistent IDentifiers (PIDs).
2019
# In the CMIP world, there are two types of PIDs:
21-
# * file PID (also called tracking_id)
22-
# * dataset PID (often referred to as just PID).
23-
#
24-
# A dataset is a collection of files from a single variable sampled at a single
25-
# frequency from a single model running a single experiment.
26-
# All the datasets from a single model or a single experiment (and model) are grouped
27-
# under a DOI.
20+
21+
# * file PID (normally referred to as a tracking ID)
22+
# * dataset PID (normally simply referred to as PID).
23+
24+
# A dataset is a collection of files
25+
# (for CMIP, this collection of files
26+
# is for a single variable sampled at a single frequency and spatial sampling
27+
# from a single model running a single experiment).
28+
# Both PID types can be passed to `ids_or_paths`.
2829

2930

3031
# %% [markdown]
@@ -78,12 +79,20 @@
7879

7980

8081
# %% [markdown]
81-
# You can specify the level of the DOI (model or experiment)
82+
# There are multiple possibilities for the retrieved DOI.
83+
# These vary based on the granularity of the DOI.
84+
# At the moment, as far as we know, there are two granularities:
85+
# * model (capturing all submissions to a given MIP by a given model)
86+
# * DRS: `<mip_era>/<activity_id>/<institution_id>/<source_id>`
87+
# * experiment (capturing all submissions to a given MIP by a given model for a
88+
# given experiment.
89+
# * DRS: `<mip_era>/<activity_id>/<institution_id>/<source_id>/<experiment_id>`
90+
# This is controlled by `doi_granularity`.
8291

8392
# %%
8493
citations = get(
8594
["hdl:21.14100/90f93a05-357c-4ea2-b61f-bf2418700791"],
86-
doi_level="experiment",
95+
doi_granularity="experiment",
8796
)
8897
print(citations[0])
8998

src/cmipcite/citations.py

Lines changed: 63 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -42,32 +42,42 @@ class AuthorListStyle(StrEnum):
4242
"""
4343

4444

45-
# TODO: change to DOIGranularity throughout
46-
class DOILevel(StrEnum):
45+
class DOIGranularity(StrEnum):
4746
"""
48-
DOI level
49-
50-
DOIs can be minted at different levels of granularity
51-
i.e. they can capture different groups of datasets.
52-
For example, DOIs minted at the 'model' level
53-
apply to all submissions from that model for a given MIP.
54-
DOIs minted at the 'experiment' level
55-
apply to all outputs from a given experiment
56-
run by a given model in a given MIP.
47+
DOI granularity
48+
49+
CMIP data can be aggregated at different granularities.
50+
Data citations are designated on data aggregations belonging to a model
51+
contribution to a MIP (or activity_id) and on data belonging to an experiment
52+
contributed by a specific model:
53+
model: <mip_era>/<activity_id>/<institution_id>/<source_id>
54+
experiment: <mip_era>/<activity_id>/<institution_id>/<source_id>/<experiment_id>.
5755
"""
5856

59-
# TODO: update notes.
60-
# We use the 'lowest-level' from the DRS as a short-hand.
61-
# experiment is short for mip-model-experiment.
62-
# model is short for mip-model.
6357
EXPERIMENT = "experiment"
6458
"""
65-
Experiment level DOI.
59+
mip-model-experiment granularity of DOI.
6660
"""
6761

6862
MODEL = "model"
6963
"""
70-
Model level DOI
64+
mip-model granularity of DOI.
65+
"""
66+
67+
68+
class FormatOption(StrEnum):
69+
"""
70+
Citation format options
71+
"""
72+
73+
BIBTEX = "bibtex"
74+
"""
75+
Bibtex format
76+
"""
77+
78+
TEXT = "text"
79+
"""
80+
Plain text file
7181
"""
7282

7383

@@ -183,7 +193,7 @@ def get_tracking_id_from_cmip_netcdf(nc_path: Path) -> str:
183193

184194
def get_doi_and_version( # type: ignore
185195
in_value: str,
186-
doi_level: DOILevel,
196+
doi_granularity: DOIGranularity,
187197
client: RESTHandleClient | None = None,
188198
get_tracking_id_from_path: Callable[[Path], str] = get_tracking_id_from_cmip_netcdf,
189199
multi_dataset_handling: MultiDatasetHandlingStrategy | None = None,
@@ -196,11 +206,14 @@ def get_doi_and_version( # type: ignore
196206
in_value
197207
Input ID or path to a netCDF file
198208
199-
doi_level
200-
TODO: rename and update
201-
Level of DOI to retrieve.
209+
doi_granularity
210+
Granularity of DOI to retrieve.
202211
203-
See [DOILevel][(m).] for details.
212+
We use the 'lowest-level' from the DRS as a short-hand.
213+
"experiment" is short for mip-model-experiment.
214+
"model" is short for mip-model.
215+
216+
See [DOIGranularity][(m).] for details.
204217
205218
client
206219
Client to use for interacting with pyhandle's REST API
@@ -260,7 +273,7 @@ def get_doi_and_version( # type: ignore
260273
doi_raw = client.get_value_from_handle(pid, "IS_PART_OF")
261274
doi = doi_raw.replace("doi:", "")
262275

263-
if doi_level == DOILevel.MODEL:
276+
if doi_granularity == DOIGranularity.MODEL:
264277
# get model doi
265278
r = httpx.get(
266279
f"https://api.datacite.org/dois/{doi}",
@@ -270,12 +283,12 @@ def get_doi_and_version( # type: ignore
270283
"identifier"
271284
]
272285

273-
elif doi_level == DOILevel.EXPERIMENT:
286+
elif doi_granularity == DOIGranularity.EXPERIMENT:
274287
# doi is already in the desired form
275288
pass
276289

277290
else: # pragma: no cover
278-
raise NotImplementedError(doi_level)
291+
raise NotImplementedError(doi_granularity)
279292

280293
version = client.get_value_from_handle(pid, "VERSION_NUMBER")
281294

@@ -285,7 +298,7 @@ def get_doi_and_version( # type: ignore
285298
def get_citations( # type: ignore
286299
ids_or_paths: list[str],
287300
get_citation: Callable[[str, str], str],
288-
doi_level: DOILevel,
301+
doi_granularity: DOIGranularity,
289302
client: RESTHandleClient | None = None,
290303
multi_dataset_handling: MultiDatasetHandlingStrategy | None = None,
291304
) -> list[str]:
@@ -315,11 +328,10 @@ def get_citations( # type: ignore
315328
316329
For example, [get_bibtex_citation][(m).].
317330
318-
doi_level
319-
TODO: rename and update
320-
Level of DOI to retrieve.
331+
doi_granularity
332+
Granularity of DOI to retrieve.
321333
322-
See [DOILevel][(m).] for details.
334+
See [DOIGranularity][(m).] for details.
323335
324336
client
325337
Client to use for interacting with pyhandle's REST API
@@ -350,19 +362,22 @@ def get_citations( # type: ignore
350362
(for CMIP, this collection of files
351363
is for a single variable sampled at a single frequency and spatial sampling
352364
from a single model running a single experiment).
353-
For a given PID, we can retrieve the associated DOI.
365+
Both PID types can be passed to `ids_or_paths`.
366+
367+
For a given PID, we can retrieve an associated DOI.
354368
However, there are multiple possibilities for the retrieved DOI.
355369
These vary based on the granularity of the DOI.
356370
At the moment, as far as we know, there are two granularities:
357-
a) capturing all submissions to a given MIP by a given model
358-
b) capturing all submissions to a given MIP by a given model for a given experiment.
359-
The `doi_granularity` controls which DOI grouping level you get.
371+
* model (capturing all submissions to a given MIP by a given model)
372+
* experiment (capturing all submissions to a given MIP by a given model for a
373+
given experiment.
374+
This is controlled by `doi_granularity`.
360375
361376
Examples
362377
--------
363378
>>> citations = get_citations(
364379
... ["hdl:21.14100/f2f502c9-9626-31c6-b016-3f7c0534803b"],
365-
... doi_level=DOILevel.MODEL,
380+
... doi_granularity=DOIGranularity.MODEL,
366381
... get_citation=get_bibtex_citation,
367382
... )
368383
>>> print(citations[0])
@@ -386,7 +401,7 @@ def get_citations( # type: ignore
386401
v,
387402
client=client,
388403
multi_dataset_handling=multi_dataset_handling,
389-
doi_level=doi_level,
404+
doi_granularity=doi_granularity,
390405
)
391406
for v in ids_or_paths
392407
]
@@ -398,22 +413,6 @@ def get_citations( # type: ignore
398413
return res
399414

400415

401-
class FormatOption(StrEnum):
402-
"""
403-
Citation format options
404-
"""
405-
406-
BIBTEX = "bibtex"
407-
"""
408-
Bibtex format
409-
"""
410-
411-
TEXT = "text"
412-
"""
413-
Plain text file
414-
"""
415-
416-
417416
def translate_get_args_to_get_citations_kwargs(
418417
format: FormatOption,
419418
author_list_style: AuthorListStyle,
@@ -467,7 +466,7 @@ def get( # noqa: PLR0913
467466
in_values: list[str],
468467
format: FormatOption = FormatOption.TEXT,
469468
author_list_style: AuthorListStyle = AuthorListStyle.LONG,
470-
doi_level: DOILevel = DOILevel.MODEL,
469+
doi_granularity: DOIGranularity = DOIGranularity.MODEL,
471470
multi_dataset_handling: MultiDatasetHandlingStrategy | None = None,
472471
handle_server_url: str = "http://hdl.handle.net/",
473472
) -> list[str]:
@@ -489,10 +488,10 @@ def get( # noqa: PLR0913
489488
Whether, if the format is text,
490489
the author list should be long (all names) or short (et al.)
491490
492-
doi_level
493-
Level of DOI to retrieve.
491+
doi_granularity
492+
Granularity of DOI to retrieve.
494493
495-
See [DOILevel][(m).] for details.
494+
See [DOIGranularity][(m).] for details.
496495
497496
multi_dataset_handling
498497
Strategy to use when a given ID or file belongs to multiple datasets
@@ -519,13 +518,16 @@ def get( # noqa: PLR0913
519518
(for CMIP, this collection of files
520519
is for a single variable sampled at a single frequency and spatial sampling
521520
from a single model running a single experiment).
522-
For a given PID, we can retrieve the associated DOI.
521+
Both PID types can be passed to `in_values`.
522+
523+
For a given PID, we can retrieve an associated DOI.
523524
However, there are multiple possibilities for the retrieved DOI.
524525
These vary based on the granularity of the DOI.
525526
At the moment, as far as we know, there are two granularities:
526-
a) capturing all submissions to a given MIP by a given model
527-
b) capturing all submissions to a given MIP by a given model for a given experiment.
528-
The `doi_granularity` controls which DOI grouping level you get.
527+
* model (capturing all submissions to a given MIP by a given model)
528+
* experiment (capturing all submissions to a given MIP by a given model for a
529+
given experiment.
530+
This is controlled by `doi_granularity`.
529531
"""
530532
get_citations_kwargs = translate_get_args_to_get_citations_kwargs(
531533
format=format,
@@ -537,7 +539,7 @@ def get( # noqa: PLR0913
537539
citations = get_citations(
538540
ids_or_paths=in_values,
539541
multi_dataset_handling=multi_dataset_handling,
540-
doi_level=doi_level,
542+
doi_granularity=doi_granularity,
541543
**get_citations_kwargs,
542544
)
543545
except MultipleDatasetMemberError as exc:

src/cmipcite/cli/__init__.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import cmipcite
1313
from cmipcite.citations import (
1414
AuthorListStyle,
15-
DOILevel,
15+
DOIGranularity,
1616
FormatOption,
1717
get_citations,
1818
translate_get_args_to_get_citations_kwargs,
@@ -75,11 +75,10 @@ def get( # noqa: PLR0913
7575
help="Whether the author list should be long (all names) or short (et al.)."
7676
),
7777
] = AuthorListStyle.LONG,
78-
# TODO: rename to doi_granularity here and throughout
79-
doi_level: Annotated[
80-
DOILevel,
78+
doi_granularity: Annotated[
79+
DOIGranularity,
8180
typer.Option(help="Desired granularity of the retrieved DOIs."),
82-
] = DOILevel.MODEL,
81+
] = DOIGranularity.MODEL,
8382
multi_dataset_handling: Annotated[
8483
Optional[MultiDatasetHandlingStrategy],
8584
typer.Option(
@@ -105,7 +104,7 @@ def get( # noqa: PLR0913
105104
try:
106105
citations = get_citations(
107106
ids_or_paths=in_values,
108-
doi_level=doi_level,
107+
doi_granularity=doi_granularity,
109108
multi_dataset_handling=multi_dataset_handling,
110109
**get_citations_kwargs,
111110
)

0 commit comments

Comments
 (0)