Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a0ea75b
data: add manual_input_usa.csv
finozzifa Oct 8, 2025
658a937
code: create parser file
finozzifa Oct 8, 2025
5d787d0
code: improve manual_input_usa.py
finozzifa Oct 9, 2025
423b730
Merge branch 'prototype-2' of https://github.com/open-energy-transiti…
finozzifa Oct 20, 2025
2db7119
Merge branch 'prototype-2' of https://github.com/open-energy-transiti…
finozzifa Oct 20, 2025
57c13da
code: update manual_input_usa.py
finozzifa Oct 20, 2025
514bc8b
code: update manual_input_usa.py
finozzifa Oct 20, 2025
404cb07
code: update carrier and unit
finozzifa Oct 22, 2025
f43a2d6
code: pre-commit
finozzifa Oct 22, 2025
367c745
code: new update
finozzifa Oct 22, 2025
fa869ca
pre-commit
finozzifa Oct 22, 2025
f35276b
code: modify manual_input_usa.py
finozzifa Oct 23, 2025
2bfb02f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 23, 2025
a5cbf34
code: include pre-commit hooks
finozzifa Oct 23, 2025
f1b876d
code: solve merge conflicts
finozzifa Oct 23, 2025
85d2197
code: re-set parameter dictionary
finozzifa Oct 23, 2025
bd2f6d5
Merge branch 'prototype-2' of https://github.com/open-energy-transiti…
finozzifa Oct 28, 2025
fa24584
Merge branch 'prototype-2' of https://github.com/open-energy-transiti…
finozzifa Nov 3, 2025
b7cb057
code: solve merge conflicts
finozzifa Nov 19, 2025
2bc2903
code: move parse_input_arguments to Commons
finozzifa Nov 20, 2025
37326c0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2025
843cc85
code: move parse_input_arguments to Commons
finozzifa Nov 20, 2025
5a6bbdd
Merge branch 'issue_59_prototest_manual_input' of https://github.com/…
finozzifa Nov 20, 2025
a8a06d5
code: add unit tests
finozzifa Nov 20, 2025
9a3f7e1
Merge branch 'prototype-2' of https://github.com/open-energy-transiti…
finozzifa Dec 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions REUSE.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,26 @@ path = ["uv.lock", "*.yaml", "docs/**",]
SPDX-FileCopyrightText = "The technology-data authors"
SPDX-License-Identifier = "MIT"


[[annotations]]
path = ["test/test_data/**",]
path = ["test/test_data/**"]
SPDX-FileCopyrightText = "The technology-data authors"
SPDX-License-Identifier = "CC-BY-4.0"

[[annotations]]
path = [
"src/technologydata/package_data/dea_energy_storage/*.json", "src/technologydata/package_data/schemas/*.json"
"src/technologydata/package_data/raw/Technology_datasheet_for_energy_storage.xlsx"
]
SPDX-FileCopyrightText = "The technology-data authors"
SPDX-FileCopyrightText = "The Danish Energy Agency"
SPDX-License-Identifier = "CC-BY-4.0"

[[annotations]]
path = [
"src/technologydata/package_data/raw/Technology_datasheet_for_energy_storage.xlsx"
"src/technologydata/package_data/raw/manual_input_usa.csv"
]
SPDX-FileCopyrightText = "The Danish Energy Agency"
SPDX-FileCopyrightText = "The technology-data authors"
SPDX-License-Identifier = "CC-BY-4.0"

[[annotations]]
path = ["src/technologydata/package_data/*/*.json"]
SPDX-FileCopyrightText = "The technology-data authors"
SPDX-License-Identifier = "CC-BY-4.0"
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
--num_digits <int> Number of significant digits to round the values. Default: 4
--store_source Store the source object on the Wayback Machine. Default: False
--filter_params Filter the parameters stored to technologies.json. Default: False
--export_schema Export the Source/TechnologyCollection schemas. Default: False

Example:
python src/technologydata/package_data/dea_energy_storage/dea_energy_storage.py --num_digits 3 --store_source --filter_params

"""

import argparse
import logging
import pathlib
import re
Expand All @@ -36,6 +36,7 @@
Technology,
TechnologyCollection,
)
from technologydata.utils.commons import ArgumentConfig

path_cwd = pathlib.Path.cwd()

Expand Down Expand Up @@ -478,60 +479,26 @@ def build_technology_collection(
return TechnologyCollection(technologies=list_techs)


@pydantic.validate_call
def parse_input_arguments() -> argparse.Namespace:
"""
Parse command line arguments.
if __name__ == "__main__":
# Parse input arguments

Returns
-------
argparse.Namespace
Parsed command line arguments containing:
- Number of significant digits
- Store source flag
additional_input_args = [
ArgumentConfig(
name="--filter_params",
action="store_true",
help="filter_params. Filter the parameters stored to technologies.json. Default: false",
),
ArgumentConfig(
name="--export_schema",
action="store_true",
help="export_schema. Export the Source/TechnologyCollection schemas. Default: false",
),
]

"""
# Create the parser
parser = argparse.ArgumentParser(
input_args = Commons.parse_input_arguments(
additional_arguments=additional_input_args,
description="Parse the DEA technology storage dataset",
formatter_class=argparse.RawTextHelpFormatter,
)

# Define arguments
parser.add_argument(
"--num_digits",
type=int,
default=4,
help="Name of significant digits to round the values. ",
)

parser.add_argument(
"--store_source",
action="store_true",
help="store_source, store the source object on the wayback machine. Default: false",
)

parser.add_argument(
"--filter_params",
action="store_true",
help="filter_params. Filter the parameters stored to technologies.json. Default: false",
)

parser.add_argument(
"--export_schema",
action="store_true",
help="export_schema. Export the Source/TechnologyCollection schemas. Default: false",
)

# Parse arguments
args = parser.parse_args()

return args


if __name__ == "__main__":
# Parse input arguments
input_args = parse_input_arguments()
logger.info("Command line arguments parsed.")

# Read the raw data
Expand Down
245 changes: 245 additions & 0 deletions src/technologydata/package_data/manual_input_usa/manual_input_usa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
# SPDX-FileCopyrightText: The technology-data authors
#
# SPDX-License-Identifier: MIT

"""
Data parser for the manual_input_usa.csv data set.

How to run:
From the repository root, execute:
python src/technologydata/package_data/manual_input_usa/manual_input_usa.py

Configuration options (command-line arguments):
--num_digits <int> Number of significant digits to round the values. Default: 4
--store_source Store the source object on the Wayback Machine. Default: False

Example:
python src/technologydata/package_data/manual_input_usa/manual_input_usa.py --num_digits 3 --store_source

"""

import logging
import pathlib

import pandas

from technologydata import (
Commons,
Parameter,
Source,
SourceCollection,
Technology,
TechnologyCollection,
)

path_cwd = pathlib.Path.cwd()

logger = logging.getLogger(__name__)


def extract_units_carriers_heating_value(
input_unit: str,
) -> tuple[str, str | None, str | None]:
"""
Extract standardized units and carriers from an input unit string. Add also heating_value.

This function maps complex unit representations to simplified unit and carrier
combinations using a predefined dictionary of special patterns.

Parameters
----------
input_unit : str
A specialized unit string to be converted.

Returns
-------
tuple[str, str | None, str | None]
A tuple containing two elements:
- The first element is the standardized unit
- The second element is the corresponding carrier (or None if not found)
- The third element is the corresponding heating value (or None if not found)

"""
# Define conversion dictionary
special_patterns = {
"USD_2022/MW_FT": ("USD_2022/MW", "1/FT", "LHV"),
"MWh_H2/MWh_FT": ("MWh/MWh", "H2/FT", "LHV"),
"MWh_el/MWh_FT": ("MWh/MWh", "el/FT", "LHV"),
"t_CO2/MWh_FT": ("t/MWh", "CO2/FT", "LHV"),
"USD_2022/kWh_H2": ("USD_2022/kWh", "1/H2", "LHV"),
"MWh_el/MWh_H2": ("MWh/MWh", "el/H2", "LHV"),
"USD_2023/t_CO2/h": ("USD_2023/t/h", "1/CO2", "LHV"),
"MWh_el/t_CO2": ("MWh/t", "el/CO2", "LHV"),
"MWh_th/t_CO2": ("MWh/t", "thermal/CO2", "LHV"),
}

if isinstance(input_unit, str) and input_unit in special_patterns.keys():
return special_patterns[input_unit]
else:
return input_unit, None, None


def build_technology_collection(
dataframe: pandas.DataFrame,
sources_path: pathlib.Path,
store_source: bool = False,
) -> TechnologyCollection:
"""
Compute a collection of technologies from a grouped DataFrame.

Processes input DataFrame by grouping technologies and extracting their parameters,
creating Technology instances for each unique group.

Parameters
----------
dataframe : pandas.DataFrame
Input DataFrame containing technology parameters.
Expected columns include:
- 'scenario': Estimation or case identifier
- 'year': Year of the technology
- 'technology': Detailed technology name
- 'parameter': Parameter name
- 'value': Parameter value
- 'unit': Parameter units
- 'further_description': Extra information about the technology
- 'financial_case': Technology financial case
sources_path: pathlib.Path
Output path for storing the SourceCollection object
store_source: Optional[bool]
Flag to decide whether to store the source object on the Wayback Machine. Default False.

Returns
-------
TechnologyCollection
A collection of Technology instances, each representing a unique
technology group with its associated parameters.

Notes
-----
- The function groups the DataFrame by ["scenario", "year", "technology"]
- For each group, it creates a dictionary of Parameters
- Each Technology is instantiated with group-specific attributes

"""
list_techs = []

if store_source:
source = Source(
title="Energy system technology data for the US",
authors="Contributors to technology-data. Data source: manual_input_usa.csv",
url="https://github.com/PyPSA/technology-data/blob/master/inputs/US/manual_input_usa.csv",
)
source.ensure_in_wayback()
sources = SourceCollection(sources=[source])
sources.to_json(sources_path)
else:
sources = SourceCollection.from_json(sources_path)

for (scenario, year, technology), group in dataframe.groupby(
["scenario", "year", "technology"]
):
parameters = {}
for _, row in group.iterrows():
unit, carrier, heating_value = extract_units_carriers_heating_value(
row["unit"]
)
param_kwargs = {
"magnitude": row["value"],
"sources": sources,
}
if carrier is not None:
param_kwargs["carrier"] = carrier
if heating_value is not None:
param_kwargs["heating_value"] = heating_value
if unit is not None:
param_kwargs["units"] = unit
if row["further_description"] is not None and isinstance(
row["further_description"], str
):
param_kwargs["note"] = row["further_description"]
if row["financial_case"] is not None and isinstance(
row["financial_case"], str
):
param_kwargs["provenance"] = str(row["financial_case"])
parameters[row["parameter"]] = Parameter(**param_kwargs)
list_techs.append(
Technology(
name=technology,
region="US",
year=year,
parameters=parameters,
case=str(scenario),
detailed_technology=technology,
)
)

return TechnologyCollection(technologies=list_techs)


if __name__ == "__main__":
# Parse input arguments
input_args = Commons.parse_input_arguments(
description="Parse the technology_data manual_input_usa.csv dataset"
)
logger.info("Command line arguments parsed.")

manual_input_usa_input_path = pathlib.Path(
path_cwd,
"src",
"technologydata",
"package_data",
"raw",
"manual_input_usa.csv",
)

manual_input_usa_df = pandas.read_csv(
manual_input_usa_input_path, dtype=str, na_values="None"
)
manual_input_usa_df["value"] = manual_input_usa_df["value"].astype(float)
manual_input_usa_df["scenario"] = manual_input_usa_df["scenario"].fillna(
"not_available"
)

# Replace "per unit" with "%" and multiply val by 100
mask_per_unit = manual_input_usa_df["unit"].str.contains("per unit")
manual_input_usa_df.loc[mask_per_unit, "unit"] = manual_input_usa_df.loc[
mask_per_unit, "unit"
].str.replace("per unit", "%")
manual_input_usa_df.loc[mask_per_unit, "value"] = (
manual_input_usa_df.loc[mask_per_unit, "value"] * 100.0
).round(input_args.num_digits)
logger.info("`per unit` replaced by `%`. Corresponding value multiplied by 100.")

# Include currency_year in unit if applicable
manual_input_usa_df["unit"] = manual_input_usa_df.apply(
lambda row: Commons.update_unit_with_currency_year(
row["unit"], row["currency_year"]
),
axis=1,
)
logger.info("`currency_year` included in `unit` column.")

# Build TechnologyCollection
manual_input_usa_base_path = pathlib.Path(
path_cwd,
"src",
"technologydata",
"package_data",
"manual_input_usa",
)
output_technologies_path = pathlib.Path(
manual_input_usa_base_path,
"technologies.json",
)
output_sources_path = pathlib.Path(
manual_input_usa_base_path,
"sources.json",
)

tech_col = build_technology_collection(
manual_input_usa_df, output_sources_path, store_source=input_args.store_source
)

logger.info("TechnologyCollection object instantiated.")
tech_col.to_json(output_technologies_path)
logger.info("TechnologyCollection object exported to json.")
12 changes: 12 additions & 0 deletions src/technologydata/package_data/manual_input_usa/sources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"sources": [
{
"title": "Energy system technology data for the US",
"authors": "Contributors to technology-data. Data source: manual_input_usa.csv",
"url": "https://github.com/PyPSA/technology-data/blob/master/inputs/US/manual_input_usa.csv",
"url_archive": null,
"url_date": null,
"url_date_archive": null
}
]
}
Loading