Skip to content

Commit 3a231d0

Browse files
Merge pull request #575 from NatLabRockies/tw/aws_pcluster_example
Tw/aws pcluster example Bypassing code rules for documentation update.
2 parents 94d9547 + c34f3ad commit 3a231d0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+815173
-9011
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Running reV on an AWS Parallel Cluster
22
======================================
33

4-
.. include:: ../../../examples/aws_pcluster/README.rst
4+
.. include:: ../../../examples/aws_pcluster/README.md
55
:start-line: 2

examples/aws_pcluster/README.md

Lines changed: 521 additions & 0 deletions
Large diffs are not rendered by default.

examples/aws_pcluster/README.rst

Lines changed: 0 additions & 263 deletions
This file was deleted.

examples/aws_pcluster/config_collect.json

Lines changed: 0 additions & 14 deletions
This file was deleted.

examples/aws_pcluster/config_gen.json

Lines changed: 0 additions & 28 deletions
This file was deleted.

examples/aws_pcluster/config_multi-year.json

Lines changed: 0 additions & 19 deletions
This file was deleted.

examples/aws_pcluster/config_pipeline.json

Lines changed: 0 additions & 17 deletions
This file was deleted.

examples/aws_pcluster/make_project_points.py

Lines changed: 0 additions & 27 deletions
This file was deleted.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Memory Considerations
2+
3+
When configuring execution control parameters for a reV run, there are important considerations
4+
5+
6+
reV 5-minute NSRDB read through HSDS has apparently been consuming more memory than our HDF5 methods. This is a quick look at memory useage between local- and remote-accessed HDF5 data reads.
7+
8+
## Local File - /kfs2/datasets/NSRDB/conus/nsrdb_conus_irradiance_2018.h5
9+
### h5py: 2000 sites, 1 time step(s) (So the full 5-minute data)
10+
- data_size=0.420 GB
11+
- mem_size=0.420 GB
12+
- mem_peak=0.420 GB
13+
### rex: 2000 sites, 1 time step(s)
14+
- data_size=0.420 GB
15+
- mem_size=0.420 GB
16+
- mem_peak=0.420 GB
17+
### h5py: 2000 sites, 6 time step(s) (So, this recreates the main half-hour NSRDB)
18+
- data_size=0.070 GB
19+
- mem_size=0.070 GB
20+
- mem_peak=0.420 GB
21+
### rex: 2000 sites, 6 time step(s)
22+
- data_size=0.070 GB
23+
- mem_size=0.070 GB
24+
- mem_peak=0.420 GB
25+
26+
## Remote File Read - /nrel/nsrdb/GOES/conus/v4.0.0/nsrdb_conus_2018.h5
27+
### h5pyd: 2000 sites, 1 time step(s),
28+
- data_size=0.420 GB
29+
- mem_size=0.421 GB
30+
- mem_peak=1.267 GB
31+
### rex: 2000 sites, 1 time step(s)
32+
- data_size=0.420 GB
33+
- mem_size=0.421 GB
34+
- mem_peak=1.267 GB
35+
### h5pyd: 2000 sites, 6 time step(s)
36+
- data_size=0.070 GB
37+
- mem_size=0.070 GB
38+
- mem_peak=1.267 GB
39+
### rex: 2000 sites, 6 time step(s)
40+
- data_size=0.070 GB
41+
- mem_size=0.070 GB
42+
- mem_peak=1.267 GB
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# -*- coding: utf-8 -*-
2+
"""Trace memory use of HSDS/HDF5 data access call.
3+
4+
Testing for differences in memory use between a local and remotely accessed
5+
HDF5 file.
6+
7+
Author: ubuntu
8+
Date: Mon Feb 9 17:49:21 UTC 2026
9+
"""
10+
import os
11+
import sys
12+
import tracemalloc
13+
14+
from pathlib import Path
15+
16+
import h5py
17+
import h5pyd
18+
19+
from rex import Resource
20+
21+
os.environ["PYTHONTRACEMALLOC"] = "10"
22+
23+
HOME = Path(__file__)
24+
SAMPLE_REMOTE = "/nrel/nsrdb/GOES/conus/v4.0.0/nsrdb_conus_2018.h5"
25+
SAMPLE_LOCAL = "/kfs2/datasets/NSRDB/conus/nsrdb_conus_irradiance_2018.h5"
26+
27+
28+
def trace_run(src, hfun, ntime=1, nsites=2000):
29+
"""Trace memory for a data request.
30+
31+
Parameters
32+
----------
33+
src : str
34+
Path to an HDF5 file.
35+
hfun : function
36+
A function used to read the data in. One of h5py.File, h5pyd.File, or
37+
rex.Resource.
38+
ntime : int
39+
The time index interval to use to pull the data. Defaults to 1.
40+
nsites : int
41+
The number of sites to sample. Defaults to 10,000.
42+
"""
43+
base = str(hfun.__base__.__module__).split(".")[0]
44+
print(f"{base}: {nsites} sites, {ntime} time step(s), {src} ")
45+
tracemalloc.start()
46+
with hfun(src) as ds:
47+
if hfun.__name__ == "Resource":
48+
ghi = ds["ghi", ::ntime, :nsites]
49+
else:
50+
ghi = ds["ghi"][::ntime, :nsites]
51+
data_size = sys.getsizeof(ghi)
52+
mem_size, mem_peak = tracemalloc.get_traced_memory()
53+
data_size /= 1e9
54+
mem_size /= 1e9
55+
mem_peak /= 1e9
56+
print(f" {data_size=:.3f} GB")
57+
print(f" {mem_size=:.3f} GB")
58+
print(f" {mem_peak=:.3f} GB")
59+
60+
61+
def main(remote=True):
62+
"""Trace memory used to read in a remote or local HDF5 file.
63+
64+
Parameters
65+
----------
66+
remote : bool
67+
Attempt to read from NREL's remote HSDS server. Defaults to False,
68+
which will attempt to read from a local file.
69+
"""
70+
if remote:
71+
src = SAMPLE_REMOTE
72+
hfun = h5pyd.File
73+
else:
74+
hfun = h5py.File
75+
src = SAMPLE_LOCAL
76+
77+
trace_run(src, hfun, ntime=1)
78+
trace_run(src, Resource, ntime=1)
79+
trace_run(src, hfun, ntime=6)
80+
trace_run(src, Resource, ntime=6)
81+
82+
83+
if __name__ == "__main__":
84+
main()

0 commit comments

Comments
 (0)