diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8dfad84f..79ce380e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -17,22 +17,21 @@ jobs: contents: write runs-on: ubuntu-latest steps: - - name: Checkout repository + - name: Checkout repository πŸ›ŽοΈ uses: actions/checkout@v5 - - name: Set up Python + - name: Set up Python 🐍 uses: actions/setup-python@v6 + with: - python-version: 3.12 - - name: Install dependencies + python-version: 3.13 + - name: Install docs dependency group πŸ› οΈ run: | python -m pip install --upgrade pip - pip install .[docs] - - name: Build documentation - run: | - cd docs - sphinx-build -b html source _build/html - - name: Deploy docs + pip install --group docs + - name: Build docs πŸ“– + run: mkdocs build + - name: Deploy docs πŸš€ if: ${{ github.event_name == 'push' }} uses: JamesIves/github-pages-deploy-action@v4 with: - folder: docs/_build/html + folder: site diff --git a/.gitignore b/.gitignore index eefc1ece..8c2f2972 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ tests/regression_tests/.outputs/* docs/doctrees docs/html/* docs/index.html +site/ # Coverage reports .coverage diff --git a/Makefile b/Makefile index 9ae38fec..8b4d7a3b 100644 --- a/Makefile +++ b/Makefile @@ -25,11 +25,9 @@ clean: ## Clean build artifacts and cache @echo "Clean complete." docs: ## Build documentation - cd docs && sphinx-build -b html source _build/html - @echo "Documentation built in docs/_build/html/" + mkdocs build + @echo "Documentation built in site/" docs-clean: ## Clean documentation build artifacts - rm -rf docs/_build/ - rm -rf docs/html/ - rm -rf docs/build/.doctrees/ + rm -rf site @echo "Documentation build artifacts cleaned." diff --git a/README.md b/README.md index 5cc2786b..fd314d7b 100644 --- a/README.md +++ b/README.md @@ -55,19 +55,6 @@ create_mesh --help See the [documentation](https://bas-amop.github.io/MeshiPhi/) for more details and advanced usage. -## Documentation -Sphinx is used to generate documentation for this project. The dependencies can be installed through pip: -``` -pip install sphinx sphinx_markdown_builder sphinx_rtd_theme rinohtype -``` -When updating the docs, run the following command within the MeshiPhi directory to recompile. -``` -sphinx-build -b html ./docs/source ./docs/html -``` -Sometimes the cache needs to be cleared for internal links to update. If facing this problem, run this from the MeshiPhi directory. -``` -rm -r docs/build/.doctrees/ -``` ## Required Data sources MeshiΟ† has been built to work with a variety of open-source atmospheric and oceanographic data sources. diff --git a/docs/.nojekyll b/docs/.nojekyll deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/source/sections/Figures/DataloaderUML.png b/docs/assets/figures/DataloaderUML.png similarity index 100% rename from docs/source/sections/Figures/DataloaderUML.png rename to docs/assets/figures/DataloaderUML.png diff --git a/docs/source/sections/Figures/FlowDiagram_MeshGraph.png b/docs/assets/figures/FlowDiagram_MeshGraph.png similarity index 100% rename from docs/source/sections/Figures/FlowDiagram_MeshGraph.png rename to docs/assets/figures/FlowDiagram_MeshGraph.png diff --git a/docs/source/sections/Figures/PolarRoute_CodeFlowDiagram.png b/docs/assets/figures/PolarRoute_CodeFlowDiagram.png similarity index 100% rename from docs/source/sections/Figures/PolarRoute_CodeFlowDiagram.png rename to docs/assets/figures/PolarRoute_CodeFlowDiagram.png diff --git a/docs/source/sections/Figures/System_overview.png b/docs/assets/figures/System_overview.png similarity index 100% rename from docs/source/sections/Figures/System_overview.png rename to docs/assets/figures/System_overview.png diff --git a/docs/source/sections/Figures/build-env-mesh.drawio.png b/docs/assets/figures/build-env-mesh.drawio.png similarity index 100% rename from docs/source/sections/Figures/build-env-mesh.drawio.png rename to docs/assets/figures/build-env-mesh.drawio.png diff --git a/docs/source/sections/Figures/cellbox_json.png b/docs/assets/figures/cellbox_json.png similarity index 100% rename from docs/source/sections/Figures/cellbox_json.png rename to docs/assets/figures/cellbox_json.png diff --git a/docs/source/sections/Figures/grf_example_mesh.png b/docs/assets/figures/grf_example_mesh.png similarity index 100% rename from docs/source/sections/Figures/grf_example_mesh.png rename to docs/assets/figures/grf_example_mesh.png diff --git a/docs/source/sections/Figures/mesh-build-sequence-diagram.drawio.png b/docs/assets/figures/mesh-build-sequence-diagram.drawio.png similarity index 100% rename from docs/source/sections/Figures/mesh-build-sequence-diagram.drawio.png rename to docs/assets/figures/mesh-build-sequence-diagram.drawio.png diff --git a/docs/source/sections/Figures/mesh-construct-UML.drawio.png b/docs/assets/figures/mesh-construct-UML.drawio.png similarity index 100% rename from docs/source/sections/Figures/mesh-construct-UML.drawio.png rename to docs/assets/figures/mesh-construct-UML.drawio.png diff --git a/docs/source/sections/Figures/neighbour_graph_json.png b/docs/assets/figures/neighbour_graph_json.png similarity index 100% rename from docs/source/sections/Figures/neighbour_graph_json.png rename to docs/assets/figures/neighbour_graph_json.png diff --git a/docs/source/sections/Figures/splitting_conditions.png b/docs/assets/figures/splitting_conditions.png similarity index 100% rename from docs/source/sections/Figures/splitting_conditions.png rename to docs/assets/figures/splitting_conditions.png diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 00000000..e693a888 --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,191 @@ +# Command Line Interface + +The MeshiPhi package provides CLI entry points, used to build a digital environment from a heterogeneous collection of +source data. This digital environment file (mesh) can then be exported to a variety of file formats for use in other +systems, such as GIS software. The produced mesh file also interfaces directly with [PolarRoute](https://github.com/bas-amop/PolarRoute). +BAS's route planning software, to provide optimal routes for a vehicle travelling through the mesh. + +## `create_mesh` + +The *create_mesh* entry point builds a digital environment file from a collection of source data, which can then be saved +to a file for visualisation or use in other software. + +```sh +create_mesh +``` + +positional arguments: + +```sh +config : A configuration file detailing how to build the digital environment. JSON parsable +``` + +The format of the required ** file can be found in the [configuration - mesh construction](config/mesh_construction.md) section of the documentation. +There are also example configuration files available in the directory `examples/environment_config/grf_example.config.json` on GitHub. + +optional arguments: + +```sh +-v (verbose logging) +-o (set output location for mesh) +``` + +The format of the returned mesh.json file is explain in the [mesh.json file](output.md#the-meshjson-file) section of the documentation. + +## `export_mesh` + +Once a mesh has been built using the [`create_mesh`](#create_mesh) command, it can be exported other file types for +use in other systems (such as GIS software) using the the *export_mesh* command. + +```sh +export_mesh +``` + +positional arguments: + +```sh +mesh : A digital environment file. +output_location : The location to save the exported mesh. +output_format : The format to export the mesh to. + + +supported output formats are: + * *.json* (default) `JSON` + * *geo.json* (collection of polygons for each cell in the mesh) `GEOJSON` + * *.tif* (rasterised mesh) `TIF` + * *.png* `PNG` + +optional arguments: + +```sh +-v : verbose logging +-o : output location +-format_conf: configuration file for output format (required for TIF export, optional for GEOJSON) +``` + +an example of the format of the ** file required for .tif export is as follows: + +```json +{ + "data_name": "elevation", + "sampling_resolution": [ + 150, + 150 + ], + "projection": "3031", + "color_conf": "path to/color_conf.txt" +} +``` + +where the variables are as follows: + * `data_name` : The name of the data to be exported. This is the name of the data layer in the mesh. + * `sampling_resolution` : The resolution of the exported mesh. This is a list of two values, the first being the x resolution and the second being the y resolution. + * `projection` : The projection of the exported mesh. This is a string of the EPSG code of the projection. + * `color_conf` : The path to the color configuration file. This is a text file containing the color scheme to be used when exporting the mesh. The format of this file is as follows: + +``` +0 240 250 160 +30 230 220 170 +60 220 220 220 +100 250 250 250 +``` + +The `color_conf.txt` file contains 4 columns per line: the data_name value and the corresponding red, green, blue value between 0 and 255. + +When using the *-format_conf* option for GEOJSON output the only variable required is the `data_name`. This specifies +which of the data layers you want to export as a single GEOJSON file. + + +## `rebuild_mesh` + +Once a mesh has been built using the [`create_mesh`](#create_mesh) command the *rebuild_mesh* command allows a user to rebuild it based on the +original configs stored within the mesh file. This is primarily useful for debugging or to update old meshes produced with an older version +of the package. + +```sh +rebuild_mesh +``` + +optional arguments: + +```sh +-v : verbose logging +-o : output location +``` + +##`merge_mesh` + +When multiple compatible meshes have been created using the [`create_mesh`](#create_mesh) command, they can be merged together using the `merge_mesh`command. +This will combine the meshes into a single mesh file, replacing cellboxes in mesh1 with cellboxes in mesh2 where they overlap. + +```sh +merge_mesh +``` + +positional arguments: + +```sh +mesh1 : A digital environment file. +mesh2 : A digital environment file. +``` + +optional arguments: + +```sh +-v : verbose logging +-o : output location +-d, --directory : Flag indicating the mesh files to be merged are in a directory, not an individual file +``` + +## `plot_mesh` (GeoPlot) + +Meshes produced at any stage in the route planning process can be visualised using the [GeoPlot +library](https://github.com/bas-amop/GeoPlot). Meshes and routes can also be +plotted in other GIS software such as QGIS or ArcGIS by exporting the mesh to a common format such as .geojson or .tif +using the [`export_mesh`](#export_mesh) command. + +```sh +plot_mesh +``` + +optional arguments: + +```sh +-v : verbose logging +-o : output location +``` + +## `meshiphi_test` (for developers) + +Compares the current working branch of Meshiphi to a different git branch, and runs relevant unit and regression tests +depending on what files are different between the branches. This command should be run to ensure consistent functionality before +creating a pull request. + +Requires git to be installed, and for Meshiphi to be installed with pip in developer mode from a local git repo. + +:file:`git clone git@github.com:bas-amop/MeshiPhi.git /path/to/meshiphi` downloads Meshiphi into :file:`/path/to/meshiphi` + +:file:`pip install -e /path/to/meshiphi` installs the local copy of Meshiphi in developer mode. + +```sh +meshiphi_test +``` + +positional arguments: + +```sh +from_branch : Git branch that you want to test (optional, defaults to current branch) +into_branch : Git branch that you want to compare against +``` + +optional arguments: + +```sh +-r : run regression tests only +-u : run unit tests only +-s : save mesh of failed regression tests to `./pytest_meshiphi`. This is to avoid having to recompute meshes upon pytest failure +-p : plots difference between newly generated mesh and the reference mesh for easier diagnosis. Only computes on pytest failure +``` + +If `-s` or `-p` flag provided, a folder :file:`pytest_meshiphi` will be created in your current working directory, populated with +the failing tests to aid debugging and avoid having to regenerate them manually. diff --git a/docs/config/mesh_construction.md b/docs/config/mesh_construction.md new file mode 100644 index 00000000..54c6db9f --- /dev/null +++ b/docs/config/mesh_construction.md @@ -0,0 +1,389 @@ +# Mesh Construction + +Below is a full configuration file for building an environmental mesh using synthetic data generated from Gaussian +Random Fields (GRFs). This configuration file generates the fields `SIC`, `elevation`, `thickness`, `density`, `uC`, `vC` +(currents) and `u10`, `v10` (winds). The full configuration file is available in the file [`examples/environment_config/grf_example.config.json`](https://raw.githubusercontent.com/bas-amop/MeshiPhi/refs/heads/main/examples/environment_config/grf_example.config.json) . Other example configuration files are also +available [in the GitHub repo](https://github.com/bas-amop/MeshiPhi/tree/main/examples/environment_config), including configuration files which build meshes using real datasets. + +
+Click to open full configuration +```json +{ + "region": { + "lat_min": 0, + "lat_max": 10, + "long_min": 0, + "long_max": 10, + "start_time": "2017-02-01", + "end_time": "2017-02-04", + "cell_width": 10, + "cell_height": 10 + }, + "data_sources": [ + { + "loader": "scalar_grf", + "params": { + "data_name": "SIC", + "min": 0, + "max": 100, + "seed": 16, + "offset": 5, + "splitting_conditions": [ + { + "SIC": { + "threshold": 75, + "upper_bound": 1.0, + "lower_bound": 0.0 + } + } + ], + "dataloader_name": "scalar_grf", + "downsample_factors": [ + 1, + 1 + ], + "aggregate_type": "MEAN", + "min_dp": 5, + "in_proj": "EPSG:4326", + "out_proj": "EPSG:4326", + "x_col": "lat", + "y_col": "long", + "size": 512, + "alpha": 3, + "binary": false, + "threshold": [ + 0, + 1 + ], + "multiplier": 1 + } + }, + { + "loader": "scalar_grf", + "params": { + "data_name": "elevation", + "min": -100, + "max": 50, + "seed": 30, + "splitting_conditions": [ + { + "elevation": { + "threshold": -10, + "upper_bound": 1.0, + "lower_bound": 0.0 + } + } + ], + "dataloader_name": "scalar_grf", + "downsample_factors": [ + 1, + 1 + ], + "aggregate_type": "MEAN", + "min_dp": 5, + "in_proj": "EPSG:4326", + "out_proj": "EPSG:4326", + "x_col": "lat", + "y_col": "long", + "size": 512, + "alpha": 3, + "binary": false, + "threshold": [ + 0, + 1 + ], + "multiplier": 1, + "offset": 0 + } + }, + { + "loader": "scalar_grf", + "params": { + "data_name": "thickness", + "min": 0.65, + "max": 1.4, + "seed": 44, + "dataloader_name": "scalar_grf", + "downsample_factors": [ + 1, + 1 + ], + "aggregate_type": "MEAN", + "min_dp": 5, + "in_proj": "EPSG:4326", + "out_proj": "EPSG:4326", + "x_col": "lat", + "y_col": "long", + "size": 512, + "alpha": 3, + "binary": false, + "threshold": [ + 0, + 1 + ], + "multiplier": 1, + "offset": 0 + } + }, + { + "loader": "scalar_grf", + "params": { + "data_name": "density", + "min": 850, + "max": 1000, + "seed": 40, + "dataloader_name": "scalar_grf", + "downsample_factors": [ + 1, + 1 + ], + "aggregate_type": "MEAN", + "min_dp": 5, + "in_proj": "EPSG:4326", + "out_proj": "EPSG:4326", + "x_col": "lat", + "y_col": "long", + "size": 512, + "alpha": 3, + "binary": false, + "threshold": [ + 0, + 1 + ], + "multiplier": 1, + "offset": 0 + } + }, + { + "loader": "vector_grf", + "params": { + "data_name": "uC,vC", + "min": 0, + "max": 1, + "seed": 21, + "dataloader_name": "vector_grf", + "downsample_factors": [ + 1, + 1 + ], + "aggregate_type": "MEAN", + "min_dp": 5, + "in_proj": "EPSG:4326", + "out_proj": "EPSG:4326", + "x_col": "lat", + "y_col": "long", + "size": 512, + "alpha": 3, + "vec_x": "uC", + "vec_y": "vC" + } + }, + { + "loader": "vector_grf", + "params": { + "data_name": "u10,v10", + "min": 0, + "max": 1, + "seed": 21, + "dataloader_name": "vector_grf", + "downsample_factors": [ + 1, + 1 + ], + "aggregate_type": "MEAN", + "min_dp": 5, + "in_proj": "EPSG:4326", + "out_proj": "EPSG:4326", + "x_col": "lat", + "y_col": "long", + "size": 512, + "alpha": 3, + "vec_x": "uC", + "vec_y": "vC" + } + } + ], + "splitting": { + "split_depth": 6, + "minimum_datapoints": 5 + } +} +``` +
+ + +The configuration file used for mesh construction contains information required to build a discretised model of the environment. +Information here dictates the region in which the mesh is constructed, the data contained within the mesh and how the +mesh is split to a non-uniform resolution. The configuration file used to generate a mesh is stored in the output mesh json +in a section titled `mesh_info`. + +The mesh configuration file contains three primary sections: + +## Region + +The region section gives detailed information for the construction of the Discrete Mesh. The main definitions are the +bounding region and temporal portion of interest (`long_min`, `lat_min`, `long_max`, `lat_max`, `start_time`, `end_time`), but +also the starting shape of the spatial grid cell boxes (`cell_width`, `cell_height`) is defined before splitting is +applied. Further detail on each parameter is given below: + +```json +"region": { + "lat_min": 0, + "lat_max": 10, + "long_min": 0, + "long_max": 10, + "start_time": "2017-02-01", + "end_time": "2017-02-04", + "cell_width": 10, + "cell_height": 10 +} +``` + +where the variables are as follows: + +* `long_min` *(float, degrees)* : Minimum Longitude Edge of the Mesh +* `long_max` *(float, degrees)* : Maximum Longitude Edge of the Mesh +* `lat_min` *(float, degrees)* : Minimum Latitude Edge of the Mesh +* `lat_max` *(float, degrees)* : Maximum Latitude Edge of the Mesh +* `start_time` *(string, 'YYYY-mm-dd')* : Start Datetime of Time averaging +* `end_time` *(string, 'YYYY-mm-dd')* : End Datetime of Time averaging +* `cell_width` *(float, degrees)* : Initial Cell Box Width prior to splitting +* `cell_height` *(float, degrees)* : Initial Cell Box Height prior to splitting + +!!! note + Variables `start_time` and `end_time` also support reference to system time using the keyword **TODAY** *e.g.* + +``` +"startTime": "TODAY" , "endTime": "TODAY + 5" + +"startTime": "TODAY - 3", "endTime": "TODAY" +``` + +## Data Sources + +The 'data_sources' section of the configuration file defines which information will be added to the +mesh when constructed. Each item in the list of data sources represents a single dataset to be added +to the mesh. + +```json +"data_sources": [ + { + "loader": "scalar_grf", + "params": { + "data_name": "SIC", + "min": 0, + "max": 100, + "seed": 16, + "offset": 5, + "splitting_conditions": [ + { + "SIC": { + "threshold": 75, + "upper_bound": 1.0, + "lower_bound": 0.0 + } + } + ], + "dataloader_name": "scalar_grf", + "downsample_factors": [ + 1, + 1 + ], + "aggregate_type": "MEAN", + "min_dp": 5, + "in_proj": "EPSG:4326", + "out_proj": "EPSG:4326", + "x_col": "lat", + "y_col": "long", + "size": 512, + "alpha": 3, + "binary": false, + "threshold": [ + 0, + 1 + ], + "multiplier": 1 + } + }, + ... other data_sources +] +``` + +where the variables are as follows: + + +* `loader` *(string)* : The name of the data loader to be used to add this data source to the mesh see the [abstractScalarDataloader](../dataloaders/scalar.md) for further information about the available data loaders. +* `params` *(dict)* : A dictionary containing optional parameters which may be required by the specified data loader in 'loader'. These parameters include the following: + + * `value_fill_types` *(string)* : Determines the actions taken if a cellbox is generated with no data. The possible values are either parent (which implies assigning the value of the parent cellbox), zero or nan. + * `aggregate_type` *(string)* : Specifies how the data within a cellbox will be aggregated. By default aggregation takes place by calculating the mean of all data points within the CellBoxes bounds. *aggregate_type* allows this default to be changed to other aggregate function (e.g. MIN, MAX, COUNT). + * [scalar] `splitting_conditions`** *(list)* : The conditions which determine if a cellbox should be split based on a scalar dataset. + * `threshold` *(float)* : The threshold above or below which CellBoxes will be sub-divided to separate the datapoints into homogeneous cells. + * `upperBound` *(float)* : A percentage normalised between 0 and 1. A CellBox is deemed homogeneous if greater than this percentage of data points are above the given threshold. + * `lowerBound` *(float)* : A percentage normalised between 0 and 1. A Cellbox is deemed homogeneous if less than this percentage of data points are below the given threshold. + * [vector] `splitting_conditions` *(list)* : The conditions which determine if a cellbox should be split based on a vector dataset. + * `curl` *(float)* : The threshold value above which a cellbox will split. Is calculated as the maximum value of **Curl(F)** within a cellbox (where **F** is the vector field). + +!!! note + Splitting conditions are applied in the order they are specified in the configuration file. + + +## Splitting + +Non-uniform mesh refinement is done by selectively sub-dividing cells. Cell +sub-division is performed whenever a cell (of any size) is determined to be +inhomogeneous with respect to a specific characteristic of interest such as +SIC or ocean depth (this characteristic is defined as a splitting condition +inside the data source's params as illustrated above). + +In the figure below, a graphical representation of the splitting +decision making process is shown. In this, the blue histogram represents an +arbitrary dataset, the orange histogram represents the values in the dataset +that are greater than the threshold (and denoted 'A' in the formulae), the +black line is the threshold value, 'UB' is the upper bound, and 'LB' is the +lower bound. To be specific, this is a probability distribution, and hence the +area under the orange curve 'A' is a decimal fraction of the total dataset +(which would have an area of 1). + + +![](../assets/figures/splitting_conditions.png) + +*Plot showing how cellbox homogeneity is decided* + +* If the orange area `A <= LB`, then the homogeneity condition is `CLR`. +* If the orange area `A >= LB`, then the homogeneity condition is `HOM`. +* If the orange area `LB < A < UB`, then the homogeneity condition is `HET`. + +`CLR`, `HOM`, and `HET` are used to determine if a cellbox +should be split or not. There is also a fourth homogeneity condition `MIN` +which is only triggered when the number of datapoints within the cellbox is lower +than the minimum_datapoints specified in the config. The values are checked in this order: + +* `MIN` - Do not split the cellbox +* `CLR` - Do not split the cellbox, but allow splitting if other datasets return `HET` +* `HOM` - Do not split the cellbox +* `HET` - Split the cellbox + +In the extreme case where `UB = 1` and `LB = 0`, the cellbox will +always split if there are any datapoints above or below the UB/LB respectively. +Imagining a plot similar to the figure above, + +* If the histogram is entirely blue, `return 'CLR'` +* If the histogram is entirely orange, `return 'HOM'` +* If there's both colours, `return 'HET'` + +The splitting section of the Configuration file defines the splitting parameters that are *common* across all the data sources and determines how the CellBoxes that form the +Mesh will be sub-divided based on the homogeneity of the data points contained within to form a mesh +of non-uniform spatial resolution. + +```json +"splitting": { + "split_depth":6, + "minimum_datapoints":5 +} +``` + +where the variables are as follows: + +* `split_depth` *(float)* : The number of times the MeshBuilder will sub-divide each initial cellbox (subject to satisfying the splitting conditions of each data source) +* `minimum_datapoints` *(float)* : The minimum number of datapoints a cellbox must contain for each value type to be able to split diff --git a/docs/source/sections/Configuration/Configuration_overview.rst b/docs/config/overview.md similarity index 61% rename from docs/source/sections/Configuration/Configuration_overview.rst rename to docs/config/overview.md index 12f9705f..aa0dea6e 100644 --- a/docs/source/sections/Configuration/Configuration_overview.rst +++ b/docs/config/overview.md @@ -1,29 +1,19 @@ -###################################### -Configuration Overview -###################################### +# Overview In this section we outline the standard structure for the configuration file used as the starting point for generating an environmental mesh using the MeshiPhi software package. These configuration files are written in JSON, can be passed to MeshiPhi as command-line arguments or through a Python interpreter. -Example configuration files are provided in the :code:`examples/environment_config/` directory on GitHub. +Example configuration files are provided in the `examples/environment_config/` directory on GitHub. Descriptions of the configuration options for the Mesh Construction can be found in -the :ref:`Configuration - Mesh Construction` section of the documentation. +the [Configuration - Mesh Construction section](./mesh_construction.md) of the documentation. -.. toctree:: - :maxdepth: 1 - - ./Mesh_construction_config - - -Config Validation -^^^^^^^^^^^^^^^^^ +## Config Validation The configs supplied by the user are validated using a template JSON Schema. This schema checks that the correct keywords and datatypes are provided in the config JSON file. They also perform rudimentary checks on the values within the config to ensure that they make sense (e.g. start_time is before end_time). -.. automodule:: meshiphi.config_validation.config_validator - :members: \ No newline at end of file +::: meshiphi.config_validation.config_validator diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..5dd2de27 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,4 @@ +{% + include-markdown "../CONTRIBUTING.md" + rewrite-relative-urls=true +%} diff --git a/docs/dataloaders/adding.md b/docs/dataloaders/adding.md new file mode 100644 index 00000000..7a4b050a --- /dev/null +++ b/docs/dataloaders/adding.md @@ -0,0 +1,93 @@ +# Adding New Dataloaders + +## Adding within your own code + +If you do not wish to modify the repo to add a dataloader, you may add one into the mesh by calling the +`add_dataloader()` method of [`MeshBuilder`](../mesh_construction/classes.md#meshbuilder). + +An example of how to do this is detailed below. Assuming you're working out of a Jupyter notebook, the +basic steps would be to + +* Create a dataloader + + ```py + # Import the abstract dataloader as the base class + from meshiphi.dataloaders.scalar.abstract_scalar import ScalarDataLoader + + # Set up dataloader in the same way as the existing dataloaders + class MyDataLoader(ScalarDataLoader): + # Only user defined function required + def import_data(self, bounds): + # Read in data + if len(self.files) == 1: data = xr.open_dataset(self.files[0]) + else: data = xr.open_mfdataset(self.files) + # Trim data to boundary + data = self.trim_datapoints(bounds, data=data) + + return data + ``` + +* Create a dictionary of parameters to initialise the dataloader + + ```py + # Params formatted same way as dataloaders in config + params = { + 'files': [ + 'PATH_TO_FILE_1', + 'PATH_TO_FILE_2', + ... # Populate with as many files as you need + ], + 'data_name': 'my_data', + 'splitting_conditions':[ + { + 'my_data':{ + 'threshold': 0.5, + 'upper_bound': 0.9, + 'lower_bound': 0.1 + } + } + ] + } + ``` + +* Initialise an Environmental Mesh + + ```py + import json + from meshiphi.import MeshBuilder + + # Config to initialise mesh from + with open('config.json', 'r') as fp: + config = json.load(fp) + + # Build a mesh from the config + mesh_builder = MeshBuilder(config) + env_mesh = mesh_builder.build_environmental_mesh() + ``` + +* Add dataloader to mesh + + ```py + # Set up bounds of data in dataloader + from meshiphi.import Boundary + bounds = Boundary.from_json(config) + + # Add dataloader to mesh builder and regenerate mesh + modified_builder = mesh_builder.add_dataloader(MyDataLoader, params, bounds) + modified_mesh = modified_builder.build_environmental_mesh() + ``` + + +## Contributing a dataloader to the repository + +Each dataloader is to be implemented as a separate object for the environmental mesh to interface with. +The general workflow for creating a new dataloader is as follows: + +* Choose an appropriate dataloader type (see [Dataloader Types](overview.md#dataloader-types)). +* Create a new file under `meshiphi.DataLoaders/{dataloader-type}` with an appropriate name. +* Create `import_data()` and (optionally) `add_default_params()` methods. Examples of how to do this are shown on the [abstractScalar](scalar.md#abstract-scalar-base-class) and [abstractVector](vector.md#abstract-vector-base-class) pages. +* Add a new entry to the dataloader factory object, within `meshiphi.Dataloaders/Factory.py`. Instructions on how to do so are shown in [dataloader-factory](factory.md). + +After performing these actions, the dataloader should be ready to go. It is useful for debugging purposes +to create the dataloader object from within `meshiphi.Dataloaders/Factory.py` (e.g. within +`if __name__=='__main__':` ) and test its functionality before deploying it. \ No newline at end of file diff --git a/docs/dataloaders/factory.md b/docs/dataloaders/factory.md new file mode 100644 index 00000000..e287f58a --- /dev/null +++ b/docs/dataloaders/factory.md @@ -0,0 +1,67 @@ +# Dataloader Factory + +The dataloader factory produces dataloader objects based off of parameter +inputs provided in the config file. The parameters needed in the config are +defined in the `get_dataloader()` method of the factory. At the very +least, a name must be provided to select the dataloader from all those that +are available. + +## Adding New Dataloader to Factory + +Two actions must be performed to add a new dataloader to the Factory object. +Optionally, a third may be added if you want to add a new default value for +a parameter the dataloader requires. The actions are: + +* Import the dataloader +* Add an entry to the `dataloader_requirements` dictionary + +## Example + +In this example, a new scalar dataloader `myScalarDataloader` has been created, and +is located at `meshiphi.Dataloaders/Scalar/myScalarDataloader.py`. + +The only parameter required by this dataloader is a file to read data from. 'files' +is passed as a mandatory parameter, as 'file' and 'folder' both get translated into +a list of files, and stored in params under the key `files`: + +```py +# Add new import statement for Factory to read +from meshiphi.Dataloaders.Scalar.myScalarDataloader import myScalarDataloader + +... + +class DataLoaderFactory: + ... + def get_dataloader(self, name, bounds, params, min_dp=5): + ... + dataloader_requirements = { + ... + # Add new dataloaders + 'myscalar': (myScalarDataloader, ['files']) + ... + ... + ... +``` + +To call this dataloader, add an entry in the `config.json` +file used to generate the mesh. Alternatively, add a folder, or a list of +individual files: + +```json +{ + "loader": "myscalar", + "params": { + "file": "PATH_TO_DATA_FILE" # For a single file + "folder": "PATH_TO_FOLDER" # For a folder, must have trailing '/' + "files":[ # For a list of individual files + "PATH_TO_FILE_1", + "PATH_TO_FILE_2", + ... + ] + } +} +``` + +## Dataloader Factory Object + +::: meshiphi.dataloaders.factory \ No newline at end of file diff --git a/docs/dataloaders/interface.md b/docs/dataloaders/interface.md new file mode 100644 index 00000000..9d228349 --- /dev/null +++ b/docs/dataloaders/interface.md @@ -0,0 +1,7 @@ +# Dataloader Interface + +Shows how the mesh generation code may interact with the dataloaders. In operation, +only `get_hom_condition()` and `get_value()` are needed realistically. Other methods are +implemented in the [abstractScalar](scalar.md#abstract-scalar-base-class) and [abstractVector](vector.md#abstract-vector-base-class) dataloaders. + +::: meshiphi.dataloaders.dataloader_interface diff --git a/docs/dataloaders/lut.md b/docs/dataloaders/lut.md new file mode 100644 index 00000000..a34e9820 --- /dev/null +++ b/docs/dataloaders/lut.md @@ -0,0 +1,74 @@ +# Look Up Table Dataloaders + +## Abstract LUT Base Class + +The Abstract Base Class of the Look Up Table dataloaders holds most of the +functionality that would be needed to manipulate the data to work +with the mesh. When creating a new dataloader, the user must define +how to open the data files, and what methods are required to manipulate +the data into a standard format. + +Documentation for the abstract `LutDataLoader` is available in the [API documentation](../../autoapi/meshiphi/dataloaders/lut/abstract_lut). + +## LUT Dataloader Examples + +Creating a LUT dataloader is almost identical to creating a +[scalar dataloader](scalar.md#abstract-scalar-base-class). The key differences +are that the `LUTDataLoader` abstract base class must be used, and +regions are defined by Shapely polygons. Data is imported and saved as +GeoPandas dataframes, holding a polygon and an associated value. + +## Included LUT Dataloaders + +### Density + +Density values were taken from the paper 'Thickness distribution of Antarctic sea ice' Worby, A.P. _et al._ (2008) . This paper took a density model from the paper 'Structure, principal properties and strength of Antarctic sea ice' (Buynitskiy, V.K.). + +Name in config: `'density'` + +Class documentation: [BalticCurrentDataLoader](../../autoapi/meshiphi/dataloaders/lut/DensityDataLoader) + +### LUT CSV + +The scalar CSV dataloader is designed to take any `.csv` file and cast +it into a data source for mesh construction. It was primarily used in testing +for loading dummy data to test performance. As such, there is no data source +for this dataloader. The CSV must have two columns: 'geometry' and 'data_name'. +'geometry' must have that title, and is a shapely wkt string. data_name can have +any name, and is just the value that is associated with the polygon. + +Name in config: `'lut_csv'` + +Class documentation: [LutCSV](../../autoapi/meshiphi/dataloaders/lut/lut_csv) + +### LUT GeoJSON + +The scalar CSV dataloader is designed to take any geojson file and cast +it into a data source for mesh construction. It was primarily used in testing +for loading dummy data to test performance. When using this dataloader, a value +should be provided in the mesh config file that specifies the value and data_name +that the polygons save. The keyword in the config params is 'value'. + +Name in config: `'lut_geojson'` + +Class documentation: [LutGeoJSON](../../autoapi/meshiphi/dataloaders/lut/lut_geojson) + +### Scotland NCMPA + +GeoJSON files are provided by the Scottish government for Nature Conservation Marine Protected Areas. + +Data can be downloaded from + +Name in config: `'scotland_ncmpa'` + +Class documentation: [ScotlandNCMPA](../../autoapi/meshiphi/dataloaders/lut/scotland_ncmpa) + +### Thickness Dataloader + +Thickness values were taken from the paper 'Thickness distribution of Antarctic sea ice' Worby, A.P. _et al._ (2008) + +Data is generated using the values from this paper, and so no data file is available for download. + +Name in config: `'thickness'` + +Class documentation: [ThicknessDataLoader](../../autoapi/meshiphi/dataloaders/lut/thickness) diff --git a/docs/source/sections/Dataloaders/overview.rst b/docs/dataloaders/overview.md similarity index 53% rename from docs/source/sections/Dataloaders/overview.rst rename to docs/dataloaders/overview.md index ec88fe71..e6d52769 100644 --- a/docs/source/sections/Dataloaders/overview.rst +++ b/docs/dataloaders/overview.md @@ -1,23 +1,7 @@ -.. _dataloaders-overview: - -******************* -Dataloader Overview -******************* - -.. toctree:: - :maxdepth: 1 - :glob: - - ./DataLoaderInterface - ./Factory - ./scalar/index - ./vector/index - ./lut/index - ./AddingDataloaders +# Dataloader Overview -Section Overview -################ +## Section Overview In this section, we discuss the dataloader objects which retrieve data within a spatial (and optionally temporal) boundary for the Environmental Mesh. @@ -26,69 +10,64 @@ that is interpretable by the mesh construction code. This may include reprojecti to mercator (EPSG:4326) projection, or downsampling to reduce the computing resources needed to process large datasets. Typically, raw data is stored as a NetCDF or CSV file, however this can be whatever the user needs, so long as they are cast into either -:code:`pandas.DataFrame`'s or :code:`xarray.Dataset`'s for the +`pandas.DataFrame`'s or `xarray.Dataset`'s for the `Abstract Dataloaders`_ to work with. -.. figure:: ../Figures/DataloaderUML.png - :align: center - :width: 700 +![](../assets/figures/DataloaderUML.png) +*The dataloader subsystem* - *UML Diagram detailing the dataloader subsystem* - -Dataloader Types -================ +## Dataloader Types There are three main types of dataloaders that are implemented as abstract classes: Scalar, Vector, and Look-Up Table. - **Scalar dataloaders** are to be used on scalar datasets; i.e. variables with a single value per latitude/longitude(/time) coordinate. Examples of this are bathymetry, sea ice concentration, etc... While the raw datasets may contain more than one variable (a common example being the existence of values and errors in the same file), -these *MUST* be cut down to just coordinates, and a single variable, in order to work correctly with the :ref:`abstractScalar` dataloader. -To read more on how to implement these, follow instructions in :ref:`Adding Dataloaders page` and the :ref:`abstract scalar dataloader page`. +these *MUST* be cut down to just coordinates, and a single variable, in order to work correctly with the [abstractScalar](scalar.md#abstract-scalar-base-class) dataloader. +To read more on how to implement these, follow instructions in [Adding Dataloaders](adding.md) page and the [abstract scalar dataloader](scalar.md#abstract-scalar-base-class) page. **Vector dataloaders** are to be used on vector datasets; i.e. variables with multi-dimensional values per latitude/longitude(/time) coordinate. Examples of this are ocean currents, wind, etc... The datasets will have multiple data variables, and should be cut down to include only coordinates ('lat', 'long', and optionally 'time'), and the values for each dimensional component of the variable. This will generally be two dimensions, -however the :ref:`abstractVector` dataloader should be flexible to n-dimensional data. -Rigor should be taken when testing these dataloaders to ensure that the outputs of :code:`get_value()` method of these dataloaders produces outputs that make sense. -To read more on how to implement these, follow instructions in :ref:`Adding Dataloaders page` and :ref:`abstract vector dataloader page`. +however the [abstractVector](vector.md#abstract-vector-base-class) dataloader should be flexible to n-dimensional data. +Rigor should be taken when testing these dataloaders to ensure that the outputs of `get_value()` method of these dataloaders produces outputs that make sense. +To read more on how to implement these, follow instructions in [Adding Dataloaders](adding.md) page and [abstract vector](vector.md#abstract-vector-base-class) dataloader page. **Look-up Table Dataloaders** are to be used on datasets where boundaries define a value. Real data is always preferred to this method, however in the case where there is no data, the LUT can provide an alternative. Examples of this include ice density, exclusion zones, and marine-protected areas. For these examples, weather conditions dictate their values, and these weather conditions can be localised to specific areas. -To read more on how to implement these, follow instructions in :ref:`Adding Dataloaders page` and :ref:`abstract LUT dataloader page`. +To read more on how to implement these, follow instructions in [Adding Dataloaders](adding.md) page and [abstract LUT](lut.md#abstract-lut-base-class) dataloader page. + +## Abstract Dataloaders -Abstract Dataloaders -==================== To look at specific abstract dataloaders, use the following links: -- :ref:`abstract-scalar-dataloader` -- :ref:`abstract-vector-dataloader` -- :ref:`abstract-lut-dataloader` +- [abstract scalar dataloader](scalar.md#abstract-scalar-base-class) +- [abstract vector dataloader](vector.md#abstract-vector-base-class) +- [abstract LUT dataloader](lut.md#abstract-lut-base-class) These are the templates to be used when implementing new dataloaders into MeshiPhi. They have been split into three separate categories: Scalar, Vector, and LUT, detailed in `Dataloader Types`_. The abstract classes generalise the methods used by each dataloader type to produce outputs -that the Environmental Mesh can retrieve via the :ref:`dataloader interface`. -Scalar and Vector dataloaders are flexible in that they can store and process data as both :code:`xarray.Dataset`'s or -:code:`pandas.DataFrame`'s (and by extension, :code:`dask.DataFrames`'s). -When creating your own, :code:`dask` and :code:`xarray` should be utilised as much as possible to +that the Environmental Mesh can retrieve via the [dataloader interface](interface.md). +Scalar and Vector dataloaders are flexible in that they can store and process data as both `xarray.Dataset`s or +`pandas.DataFrame`s (and by extension, `dask.DataFrames`s). +When creating your own, `dask` and `xarray` should be utilised as much as possible to reduce memory consumption. LUT dataloaders are flexible in that they can read in CSV's, GeoJSON's, or Shapefiles, but are otherwise stored internally as GeoPandas dataframes. -The abstract base classes define the :code:`__init__()` function to have the following process: +The abstract base classes define the `__init__()` method to have the following process: -#. Read in params from config -#. Add params from :code:`self.add_default_params()`, defined by user when creating a dataloader -#. Downsample data if required and if loaded as :code:`xarray.Dataset` -#. Reproject data if required -#. Trim datapoints to initial boundary -#. Rename data column name if defined in params \ No newline at end of file +* Read in params from config +* Add params from `self.add_default_params()`, defined by user when creating a dataloader +* Downsample data if required and if loaded as `xarray.Dataset` +* Reproject data if required +* Trim datapoints to initial boundary +* Rename data column name if defined in params \ No newline at end of file diff --git a/docs/dataloaders/scalar.md b/docs/dataloaders/scalar.md new file mode 100644 index 00000000..653e57cd --- /dev/null +++ b/docs/dataloaders/scalar.md @@ -0,0 +1,356 @@ +# Scalar Dataloaders + +## Abstract Scalar Base Class + +The Abstract Base Class of the scalar dataloaders holds most of the +functionality that would be needed to manipulate the data to work +with the mesh. When creating a new dataloader, the user must define +how to open the data files, and what methods are required to manipulate +the data into a standard format. + +Documentation for the abstract `ScalarDataLoader` is available in the [API documentation](../../autoapi/meshiphi/dataloaders/scalar/abstract_scalar). + +## Scalar Dataloader Examples + +Data must be imported and saved as an xarray.Dataset, or a pandas.DataFrame object. +Below is a simple example of how to load in a NetCDF file: + +```py +from meshiphi.Dataloaders.Scalar.AbstractScalar import ScalarDataLoader +import xarray as xr +import logging + +class MyDataLoader(ScalarDataLoader): + + def import_data(self, bounds): + logging.debug("Importing my data...") + # Open Dataset + if len(self.files) == 1: data = xr.open_dataset(self.files[0]) + else: data = xr.open_mfdataset(self.files) + + # Rename coordinate columns to 'lat', 'long', 'time' if they aren't already + data = data.rename({'lon':'long'}) + + # Limit to initial boundary + data = self.trim_data(bounds, data=data) + + return data +``` + +Sometimes there are parameters that are constant for a data source, but are not +constant for all data sources. Default values are defined in the dataloader `add_default_params()`. +Below is an example of setting default parameters for reprojection of a dataset: + +```py +class MyDataLoader(ScalarDataLoader): + def add_default_params(self, params): + # Add all the regular default params that scalar dataloaders have + params = super().add_default_params(params) # This line MUST be included + + # Define projection of dataset being imported + params['in_proj'] = 'EPSG:3412' + # Define projection required by output + params['out_proj'] = 'EPSG:4326' # default is EPSG:4326, so strictly + # speaking this line is not necessary + + # Coordinates in dataset that will be reprojected into long/lat + params['x_col'] = 'x' # Becomes 'long' + params['y_col'] = 'y' # Becomes 'lat' + + return params + + def import_data(self, bounds): + # Open Dataset + data = xr.open_mfdataset(self.files) + + # Can't easily determine bounds of data in wrong projection, so skipping for now + return data +``` + +## Included Scalar Dataloaders + +The following scalar dataloaders are included in MeshiPhi. + +See the package [API Reference](../../autoapi/meshiphi/dataloaders/scalar) section of the docs for details. + +### AMSR + +The AMSR (Advanced Microwave Scanning Radiometer) dataset is a publicly +available that provides Sea Ice Concentration scans of the earth's oceans. +It is produced by researchers at the University of Bremen. + +The AMSR dataloader is currently the only 'standalone' dataloader, in that it +is defined independently of the abstract base class. This is due to issues +with `pandas` calculating mean values differently depending on how the +data is loaded. This caused issues with the regression tests passing. +This issue will be rectified soon by updating the regression tests. + +Data can be downloaded from the [University of Bremen Sea Ice Data Archive](https://seaice.uni-bremen.de/data-archive/). + +Name in config: `'amsr'` + +Class documentation: [AMSRDataLoader](../../autoapi/meshiphi/dataloaders/scalar/amsr) + +### Baltic Sea Ice + +Baltic sea ice concentration values are provided by the Finnish Meteorological Institute (FMI). +From their webpage: + + The operational sea ice service at FMI provides ice parameters over the Baltic Sea. + The parameters are based on ice chart produced on daily basis during the + Baltic Sea ice season and show the ice concentration in a 1 km grid. + +Data can be downloaded from + +Name in config: `'baltic_sic'` + +Class documentation: [BalticSeaIceDataLoader](../../autoapi/meshiphi/dataloaders/scalar/baltic_sea_ice) + +### Binary GRF + +The binary GRF dataloader is the same as the [Scalar GRF](#scalar-grf). +The only difference is that instead of returning a dataframe that consists +of values between the min/max set in the config, this dataframe will contain +only True/False. It is useful for generating land masks. + +Default parameters for binary/mask GRF dataloader : + +```json +{ + "loader": "binary_grf", + "params":{ + "data_name": "data", # - Name of the data column + "seed": None, # - Seed for random number generator. Must + # be int or None. None sets a random seed + "size": 512, # - Number of datapoints per lat/long axis + "alpha": 3, # - Power of the power-law momentum + # distribution used to generate GRF + "min": 0, # - Minimum value of GRF + "max": 1, # - Maximum value of GRF + "binary": True, # - Flag specifying this GRF is a binary mask + "threshold": 0.5 # - Value around which mask values are set. + # Below this, values are set to False + # Above this, values are set to True + } +} +``` + +Name in config: `'binary_grf'` + +See [scalar GRF](#scalar-grf) for documentation on the dataloader + + + +### BSOSE Depth + +B-SOSE (Biogeochemical Southern Ocean State Estimate solution) provide a publicly available dataset that +hosts (amongst other products) sea ice concentration (SIC) of the southern ocean. Their SIC product provides +a 'depth' value, which this dataloader ingests. +BSOSE is an extension of the SOSE project led by Mazloff at the Scripps Institution of Oceanography. + +From their website: +> The Southern Ocean State Estimate (SOSE) is a model-generated best fit to Southern Ocean +> observations. As such, it provides a quantitatively useful climatology of the mean-state +> of the Southern Ocean. + +Name in config: `'bsose_depth'` + +Data can be downloaded from + +Class documentation: [BSOSEDepthDataLoader](../../autoapi/meshiphi/dataloaders/scalar/bsose_depth) + +!!! note + This dataloader may not work "as is" for new data downloaded, it has been internally collated into a more easily ingestable format. + +### BSOSE Sea Ice + +B-SOSE (Biogeochemical Southern Ocean State Estimate solution) provide a publicly available dataset that +hosts (amongst other products) sea ice concentration of the southern ocean. It is an extension of the +SOSE project led by Mazloff at the Scripps Institution of Oceanography. + +From their website: +> The Southern Ocean State Estimate (SOSE) is a model-generated best fit to Southern Ocean +> observations. As such, it provides a quantitatively useful climatology of the mean-state +> of the Southern Ocean. + +Data can be downloaded from + +Name in config: `'bsose_sic'` + +Class documentation: [BSOSESeaIceDataLoader](../../autoapi/meshiphi/dataloaders/scalar/bsose_sea_ice) + +!!! note + This dataloader may not work as is for new data downloaded, it has been internally collated into a more easily ingestable format. + + +### ECMWFSigWaveHeight + +The ECMWF (European Centre for Medium-Range Weather Forecasts) are both a +research institute and a 24/7 operational service, producing global numerical +weather predictions and other data for their Member and Co-operating States +and the broader community. The Centre has one of the largest supercomputer +facilities and meteorological data archives in the world. Other strategic +activities include delivering advanced training and assisting the WMO in +implementing its programmes. +(description taken from ) + +!!! note + This dataloader is for the grib2 files. + +Data can be downloaded from + +Name in config: `'ecmwf_sig_wave_height'` + +Class documentation: [ECMWFSigWaveHeightDataLoader](../../autoapi/meshiphi/dataloaders/scalar/ecmwf_sig_wave_height) + +### ERA5 Dataloaders + +ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). +It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. + +From their website: + +> ERA5 provides hourly estimates of a large number of atmospheric, +> land and oceanic climate variables. The data cover the Earth on a +> 30km grid and resolve the atmosphere using 137 levels from the +> surface up to a height of 80km. ERA5 includes information about +> uncertainties for all variables at reduced spatial and temporal resolutions. + +Instructions for how to download their data products are +available from + +Variables, their names in config and class documentation: + +* Maximum Wave Height: `'era5_max_wave_height'` - [ERA5MaxWaveHeightDataLoader](../../autoapi/meshiphi/dataloaders/scalar/era5_max_wave_height) +* Significant Wave Height: `'era5_sig_wave_height'` - [ERA5SigWaveHeightDataLoader](../../autoapi/meshiphi/dataloaders/scalar/era5_sig_wave_height) +* Mean Wave Direction Dataloader: `'era5_wave_dir'` - [ERA5MeanWaveDirDataLoader](../../autoapi/meshiphi/dataloaders/scalar/era5_wave_dir) +* Mean Wave Period: `'era5_wave_period'` - [ERA5WavePeriodDataLoader](../../autoapi/meshiphi/dataloaders/scalar/era5_wave_period) +* Wind Direction: `'era5_wind_dir'` - [ERA5WindDirDataLoader](../../autoapi/meshiphi/dataloaders/scalar/era5_wind_dir) +* Wind Magnitude: `'era5_wind_mag'` - [ERA5WindMagDataLoader](../../autoapi/meshiphi/dataloaders/scalar/era5_wind_mag) + +### GEBCO + +The General Bathymetric Chart of the Oceans (GEBCO) is a publicly available +bathymetric chart of the Earth's oceans. It is a common resource used by +ocean scientists, amongst others. + +Data can be downloaded from + +Name in config: `'gebco'` + +Class documentation: [GEBCODataLoader](../../autoapi/meshiphi/dataloaders/scalar/gebco) + +### IceNet + +IceNet is a seasonal sea ice forecasting tool being developed by researchers +at the British Antarctic Survey. From the website: + +> IceNet is a probabilistic, deep learning sea ice forecasting system +> developed by an international team and led by British Antarctic Survey +> and The Alan Turing Institute [Andersson et al., 2021]. IceNet has been +> trained on climate simulations and observational data to forecast the +> next 6 months of monthly-averaged sea ice concentration maps. + +Data for IceNet V1 is available from +Data for IceNet V2 is not publicly available. + +Name in config: `'icenet'` + +Class documentation: [IceNetDataLoader](../../autoapi/meshiphi/dataloaders/scalar/icenet) + + +### MODIS + +Moderate Resolution Imaging Spectroradiometer (MODIS) is a satellite-borne +instrument developed by NASA. + +From their website: +> MODIS are viewing the entire Earth's surface every 1 to 2 days, +> acquiring data in 36 spectral bands, or groups of wavelengths. + +Information on where to download their data products can be found at + +Name in config: `'modis'` + +Class documentation: [MODISDataLoader](../../autoapi/meshiphi/dataloaders/scalar/modis) + + +### Scalar CSV + +The scalar CSV dataloader is designed to take any `.csv` file and cast +it into a data source for mesh construction. It was primarily used in testing +for loading dummy data to test performance. As such, there is no data source +for this dataloader. + +Name in config: `'scalar_csv'` + +Class documentation: [ScalarCSVDataLoader](../../autoapi/meshiphi/dataloaders/scalar/scalar_csv) + +### Scalar GRF + +Produces a gaussian random field of scalar values, useful for producing +artificial, yet somewhat realistic values for real-world variables. + +Name in config: `'scalar_grf'` + +Can be used to generate [binary masks](#binary-grf). + +For vector fields, see [vector GRF](vector.md#vector-grf). + +Class documentation: [ScalarGRFDataLoader](../../autoapi/meshiphi/dataloaders/scalar/scalar_grf) + +Default parameters for scalar GRF dataloader. + +```json +{ + "loader": "scalar_grf", + "params":{ + "data_name": "data", # - Name of the data column + "seed": None, # - Seed for random number generator. Must + # be int or None. None sets a random seed + "size": 512, # - Number of datapoints per lat/long axis + "alpha": 3, # - Power of the power-law momentum + # distribution used to generate GRF + "binary": False, # - Flag specifying this GRF isn't a binary mask + "threshold": [0, 1], # - Caps of min/max values to ensure normalising + # not skewed by outlier in randomised GRF + "min": -10, # - Minimum value of GRF + "max": 10, # - Maximum value of GRF + "multiplier": 1, # - Multiplier for entire dataset + "offset": 0 # - Offset for entire dataset + } +} +``` + +!!! note + min/max are set BEFORE multiplier and offset are used. The actual values for the min and max are + + * `actual_min = multiplier * min + offset` + * `actual_max = multiplier * max + offset` + +### Shape Dataloader + +The shape dataloader is designed to create abstract shapes with well known +boundaries, and cast it into a data source for mesh construction. It was primarily +used in testing to debug cellbox generation. As such, there is no data source +for this dataloader. + +Class documentation: [ShapeDataLoader](../../autoapi/meshiphi/dataloaders/scalar/shape) + +### Visual_iced + +Visual_iced is a dataloader for .tiff images, which are outputs from the visual_iced library +developed by Martin Rogers at the British Antarctic Survey's AI Lab. These visual_iced +images are ice/water binary files, generated from a combination of MODIS and SAR +satellite imagery. + +In the source data, 0s are representative of open water, and 1s are representative of +ice. In the dataloader, we map these values to sea ice concentration, in the range of 0 to 100. +Values between 0 and 100 are generated by the aggregation of the 0s and 1s within each cell. + +!!! note + The visual_iced dataloader only supports loading in single files, as the visual_iced datasets are not temporally continuous within a given boundary. + +Name in config: `'visual_iced'` + +Class documentation: [VisualIcedDataLoader](../../autoapi/meshiphi/dataloaders/scalar/visual_iced) diff --git a/docs/dataloaders/vector.md b/docs/dataloaders/vector.md new file mode 100644 index 00000000..0893044b --- /dev/null +++ b/docs/dataloaders/vector.md @@ -0,0 +1,243 @@ +# Vector Dataloaders + +## Abstract Vector Base Class + +The Abstract Base Class of the vector dataloaders holds most of the +functionality that would be needed to manipulate the data to work +with the mesh. When creating a new dataloader, the user must define +how to open the data files, and what methods are required to manipulate +the data into a standard format. + +Documentation for the abstract `VectorDataLoader` is available in the [API documentation](../../autoapi/meshiphi/dataloaders/vector/abstract_vector). + + +## Examples + +Creating a vector dataloader is almost identical to creating a +[scalar dataloader](scalar.md#abstract-scalar-base-class). The key differences +are that the `VectorDataLoader` abstract base class must be used, and that +the `data_name` is a comma separated string of the vector component names. +e.g. a dataloader storing a vector with column names `uC` and +`vC` will have an attribute `self.data_name = 'uC,vC'` +Data must be imported and saved as an `xarray.Dataset`, or a +`pandas.DataFrame` object. Below is a simple example of how to load in a +NetCDF file: + +```py +from meshiphi.Dataloaders.Scalar.AbstractScalar import VectorDataLoader +import xarray as xr +import logging + +class MyDataLoader(VectorDataLoader): + def import_data(self, bounds): + logging.debug("Importing my data...") + # Open Dataset + logging.debug(f"- Opening file {self.file}") + data = xr.open_dataset(self.file) + + # Rename coordinate columns to 'lat', 'long', 'time' if they aren't already + data = data.rename({'lon':'long'}) + + # Limit to initial boundary + data = self.trim_data(bounds, data=data) + + return data +``` + +Similar to scalar data loaders, sometimes there are parameters that are constant +for a data source, but are not constant for all data sources. Default values may +be defined either in the dataloader factory, or within the dataloader itself. +Below is an example of setting default parameters for reprojection of a dataset: + +```py +class MyDataLoader(ScalarDataLoader): + def add_default_params(self, params): + # Add all the regular default params that scalar dataloaders have + params = super().add_default_params(params) # This line MUST be included + + # Define projection of dataset being imported + params['in_proj'] = 'EPSG:3412' + # Define projection required by output + params['out_proj'] = 'EPSG:4326' # default is EPSG:4326, so strictly + # speaking this line is not necessary + + # Coordinates in dataset that will be reprojected into long/lat + params['x_col'] = 'x' # Becomes 'long' + params['y_col'] = 'y' # Becomes 'lat' + + def import_data(self, bounds): + # Open Dataset + data = xr.open_mfdataset(self.file) + + # Can't easily determine bounds of data in wrong projection, so skipping for now + return data +``` + +## Included Vector Dataloaders + +The following vector dataloaders are included in MeshiPhi. + +### Baltic Currents + +Baltic current values are provided by the Finnish Meteorological Institute (FMI). +From their webpage: + +> This CMEMS Baltic Sea Physical Reanalysis product provides a physical reanalysis +> for the whole Baltic Sea area, inclusive the Transition Area to the North Sea. +> The surface variables are available every hour and include sea surface height, +> ice concentration and total ice thickness. The other variables, available as daily +> and monthly means, are salinity, temperature, horizontal current components, +> mixed layer depth, bottom salinity and bottom temperature. + +Data can be downloaded from + +Name in config: `'baltic_currents'` + +Class documentation: [BalticCurrentDataLoader](../../autoapi/meshiphi/dataloaders/vector/baltic_current) + +### DUACS Currents + +DUACS is a European operational multi-mission production system of altimeter data that provides (amongst other products) +global ocean current vectors. The system was developed by CNES/CLS and data is available from the copernicus marine data +service. + +From their website: +> Altimeter satellite gridded Sea Level Anomalies (SLA) computed with respect to a twenty-year 1993, 2012 mean. The SLA +> is estimated by Optimal Interpolation, merging the L3 along-track measurement from the different altimeter missions +> available. Part of the processing is fitted to the Global Ocean. The product gives additional variables (i.e. +> Absolute Dynamic Topography and geostrophic currents). + +Near real-time data can be downloaded from `here `_. + +Reanalysis data can be downloaded from `here. `_ + +Name in config: `'duacs_currents'` + +Class documentation: [DuacsCurrentDataLoader](../../autoapi/meshiphi/dataloaders/vector/duacs_current) + +### ERA5 Wave Direction + +ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). +It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. + +From their website: + +> ERA5 provides hourly estimates of a large number of atmospheric, +> land and oceanic climate variables. The data cover the Earth on a +> 30km grid and resolve the atmosphere using 137 levels from the +> surface up to a height of 80km. ERA5 includes information about +> uncertainties for all variables at reduced spatial and temporal resolutions. + +Instructions for how to download their data products are +available from + +This dataloader takes the mean wave direction variable, which gives the direction the waves are coming from as an angle +from north in degrees, and converts it to a unit vector with u and v components. + +Name in config: `'era5_wave_direction'` + +Class documentation: [ERA5WaveDirectionLoader](../../autoapi/meshiphi/dataloaders/vector/era5_wave_direction_vector) + +### ERA5 Wind + +See above for information about ERA5. + +Name in config: `'era5_wind'` + +Class documentation: [ERA5WindDataLoader](../../autoapi/meshiphi/dataloaders/vector/era5_wind) + +### North Sea Currents + +North Atlantic Ocean currents are provided by the Proudman Oceanographic Laboratory +Coastal-Ocean Modelling System (POLCOMS). Their dataset was generated by the UK National +Oceanography Centre, Liverpool. + +More information on where to download the data is +available + +Name in config: `'northsea_currents'` + +Class documentation: [NorthSeaCurrentDataLoader](../../autoapi/meshiphi/dataloaders/vector/north_sea_current) + +### ORAS5 Currents + +Ocean Reanalysis System 5 (ORAS5) is a publicly available dataset providing +estimated values for many different ocean parameters, including ocean currents. + +From their website: + +> This dataset provides global ocean and sea-ice reanalysis +> (ORAS5: Ocean Reanalysis System 5) monthly mean data prepared by +> the European Centre for Medium-Range Weather Forecasts (ECMWF) +> OCEAN5 ocean analysis-reanalysis system. This system comprises 5 ensemble +> members from which one member is published in this catalogue entry. + +Data can be downloaded from + +Name in config: `'oras5_currents'` + +Class documentation: [ORAS5CurrentDataLoader](../../autoapi/meshiphi/dataloaders/vector/oras5_current) + +### SOSE Currents + +Southern Ocean State Estimate (SOSE) is a publicly available dataset that provides (amongst other products) +ocean current vectors of the southern ocean. It is a project led by Mazloff at the Scripps Institution of Oceanography. + +From their website: +> The Southern Ocean State Estimate (SOSE) is a model-generated best fit to Southern Ocean +> observations. As such, it provides a quantitatively useful climatology of the mean-state +> of the Southern Ocean. + +Data can be downloaded from `here `_ + +Name in config: `'sose'` + +Class documentation: [SOSEDataLoader](../../autoapi/meshiphi/dataloaders/vector/sose) + +!!! note + This dataloader may not work as is for new data downloaded, it has been internally collated into a more easily ingestable format. + +### Vector CSV + +The vector CSV dataloader is designed to take any `.csv` file and cast +it into a data source for mesh construction. It was primarily used in testing +for loading dummy data to test performance. As such, there is no data source +for this dataloader. + +Name in config: `'vector_csv'` + +Class documentation: [VectorCSVDataLoader](../../autoapi/meshiphi/dataloaders/vector/vector_csv) + +### Vector GRF + +Produces a gaussian random field of vector values, useful for producing +artificial, yet somewhat realistic values for real-world variables. +Values are broken down into `x` and `y` components, and saved in two +columns in the final dataframe. + +Name in config: `'vector_grf'` + +Class documentation: [VectorGRFDataLoader](../../autoapi/meshiphi/dataloaders/vector/vector_grf) + +Can be used to generate [binary masks](scalar.md#binary-grf). + +For scalar fields, see [scalar GRF](scalar.md#scalar-grf). + +Default parameters for vector GRF dataloader: + +```json +{ + "loader": "vector_grf", + "params":{ + "vec_x": "uC", # - Name of the first data column + "vec_y": "vC", # - Name of the second data column + "seed": None, # - Seed for random number generator. Must + # be int or None. None sets a random seed + "size": 512, # - Number of datapoints per lat/long axis + "alpha": 3, # - Power of the power-law momentum + # distribution used to generate GRF + "min": 0, # - Minimum value of vector magnitude + "max": 10 # - Maximum value of vector magnitude + } +} +``` \ No newline at end of file diff --git a/docs/source/sections/testing_strategy.rst b/docs/development.md similarity index 65% rename from docs/source/sections/testing_strategy.rst rename to docs/development.md index 23c294d7..817c8e5b 100644 --- a/docs/source/sections/testing_strategy.rst +++ b/docs/development.md @@ -1,7 +1,6 @@ -.. _testing_strategy: +# Development -Testing Strategy -================= +## Testing When updating any files within the MeshiPhi repository, tests must be run to ensure that the core functionality of the software remains unchanged. @@ -23,4 +22,12 @@ To avoid running slow tests: To run only slow tests: -`pytest -m slow` \ No newline at end of file +`pytest -m slow` + +## Documentation + +Documentation is built from the `docs/` directory using `mkdocs` and plugins. + +To install the docs dependencies, from the project root run `pip install --group docs` in your virtual environment. + +To serve the docs locally, run `mkdocs serve`. diff --git a/docs/examples.md b/docs/examples.md new file mode 100644 index 00000000..de8e8f56 --- /dev/null +++ b/docs/examples.md @@ -0,0 +1,64 @@ +# Examples + +Digital environment files (meshes) can be created using the MeshiPhi package, either through the +command line interface (CLI) or through the python terminal. This section will provide examples of how to create a digital +environment file using Python. + +## Creating the Digital Environment. + +A configuration file is needed to initialise the `Mesh` object which forms the digital environment. This +configuration file is of the same format used in the [`create_mesh`](cli.md#create_mesh) CLI entry-point, and may either be loaded from a +*json* file or constructed within a python interpreter. + +Loading configuration information from a `json` file: + +```py +import json +with open('examples/environment_config/grf_example.config.json', 'r') as f: + config = json.load(f) +``` + +The digital environment `Mesh` object can then be initialised. This mesh object will be constructed using parameters in it +configuration file. This mesh object can be manipulated further, such as increasing its resolution through further +splitting, adding additional data sources or altering is configuration parameters using functions listed in +the [Methods - Mesh Construction](mesh_construction/index.md) section of the documentation. The digital environment `Mesh` object can then be cast to +a json object and saved to a file. + +```py +from meshiphi.mesh_generation.mesh_builder import MeshBuilder + +cg = MeshBuilder(config).build_environmental_mesh() + +mesh = cg.to_json() +``` + +The `Mesh` object can be visualised using the [GeoPlot](https://github.com/bas-amop/GeoPlot) package, also developed +by BAS. This package is not included in the distribution of MeshiPhi, but can be installed using the following command: + +```py +pip install bas_geoplot +``` + +**GeoPlot** can be used to visualise the `Mesh` object using the following code in an iPython notebook or +any python interpreter: + +```py + +from bas_geoplot.interactive import Map + +mesh = pd.DataFrame(mesh_json['cellboxes']) +map = Map(title="GRF Example") + +map.Maps(mesh, 'MeshGrid', predefined='cx') +map.Maps(mesh, 'SIC', predefined='SIC') +map.Maps(mesh, 'Elevation', predefined='Elev', show=False) +map.Vectors(mesh,'Currents', show=False, predefined='Currents') +map.Vectors(mesh, 'Winds', predefined='Winds', show=False) + +map.show() +``` + +The prior should produce a plot which shows the digital environment, including sea ice concentration, elevation, currents and wind. + +*The expected output of running bas_geoplot on the GRF example mesh provided:* +![](assets/figures/grf_example_mesh.png) diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..63e06285 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,4 @@ +{% + include-markdown "../README.md" + rewrite-relative-urls=true +%} diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 00000000..fa3aeb86 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,71 @@ +# Installation + +In this section we outline the necessary steps for installing the MeshiPhi software package. MeshiPhi requires a +pre-existing installation of Python 3.8 or higher. + + +## Installing MeshiPhi + +MeshiPhi can be installed from one of the following two sources: + +from PyPI: `pip install MeshiPhi` + +from Github: + +```sh +git clone https://github.com/bas-amop/MeshiPhi.git +cd MeshiPhi +pip install . +``` + +## Installing GDAL (Optional) + +MeshiPhi has GDAL as an optional requirement. It is only used when exporting TIFF images, so if this is not useful to +you, we would recommend steering clear. It is not trivial and is a common source of problems. +With that said, below are instructions for various operating systems. + +### Windows + +!!! note + We assume a version of Windows 10 or higher, with a working version of Python 3.9 including pip installed. + +!!! note + We recommend installing MeshiPhi into a virtual environment. + + +```sh +pip install pipwin # pipwin is a package that allows for easy installation of windows binaries +pipwin install gdal +pipwin install fiona +``` + +### Linux/MacOS + +Ubuntu/Debian: + +```sh +sudo add-apt-repository ppa:ubuntugis/ppa +sudo apt-get update +sudo apt-get install gdal-bin libgdal-dev +export CPLUS_INCLUDE_PATH=/usr/include/gdal +export C_INCLUDE_PATH=/usr/include/gdal +pip install GDAL==$(gdal-config --version) +``` + +Fedora: + +```sh +sudo dnf update +sudo dnf install gdal gdal-devel +export CPLUS_INCLUDE_PATH=/usr/include/gdal +export C_INCLUDE_PATH=/usr/include/gdal +pip install GDAL==$(gdal-config --version) +``` + +MacOS (with HomeBrew): + +```sh +brew install gdal --HEAD +brew install gdal +pip install GDAL==$(gdal-config --version) +``` \ No newline at end of file diff --git a/docs/mesh_construction/classes.md b/docs/mesh_construction/classes.md new file mode 100644 index 00000000..0d456b4a --- /dev/null +++ b/docs/mesh_construction/classes.md @@ -0,0 +1,97 @@ +# Mesh Construction - Classes + +This section describes the main classes of the Mesh Construction module in detail. +For an overview of the abstractions behind the Mesh Construction module, see the +[Mesh Construction - Overview](index.md) section of the documentation. + +## MeshBuilder + +The `MeshBuilder` object is the main class of the Mesh Construction module. It is used to build the +`EnvironmentalMesh` object from a collection geospatial data. Features of the created `EnvironmentalMesh` +as be set using a configuration file passed to the `MeshBuilder` object. For more information on the format +of the configuration file, see the [configuration - mesh construction](../config/mesh_construction.md) section of the documentation. + +::: meshiphi.mesh_generation.mesh_builder.MeshBuilder + options: + merge_init_into_class: true + members: + - build_environmental_mesh + - split_and_replace + - split_to_depth + - add_dataloader + +## EnvironmentMesh + +The `EnvironmentMesh` object is a collection of geospatial boundaries containing an aggregated representation +of the data contained within the boundaries (`AggregatedCellBox` objects). The `EnvironmentMesh` object is +created by the `MeshBuilder` object, though the object is mutable and can be updated after construction. + +::: meshiphi.mesh_generation.environment_mesh.EnvironmentMesh + options: + merge_init_into_class: true + members: + - load_from_json + - update_cellbox + - to_json + - to_geojson + - to_tif + - save + - merge_mesh + - split_and_replace + +## NeighbourGraph + +The `NeighbourGraph` object is used to store the connectivity information between the cells of the `EnvironmentMesh`. +The `NeighbourGraph` object is created by the `MeshBuilder` object and is encoded into the `EnvironmentalMesh`. + +::: meshiphi.mesh_generation.neighbour_graph.NeighbourGraph + options: + members: + - initialise_neighbour_graph + - get_neighbour_case + - update_neighbours + +## CellBox + +The `CellBox` object is used to store the data contained within a geospatial boundary in the `MeshBuilder`. +The `CellBox` object is created by the `MeshBuilder` object and transformed into an `AggregatedCellBox` object +when the `MeshBuilder` returns the ``EnvironmentalMesh`` object. + +::: meshiphi.mesh_generation.cellbox.CellBox + options: + merge_init_into_class: true + members: + - set_data_source + - should_split + - split + - set_parent + - aggregate + +## MetaData + +The `Metadata` object is used to store the metadata associated with a `CellBox` object within the `MeshBuilder`. This includes +associated DataLoaders, the depth of the ``CellBox`` within the `MeshBuilder`, and the parent `CellBox` of the `CellBox` among others. + +::: meshiphi.mesh_generation.metadata.Metadata + options: + merge_init_into_class: true + +## AggregatedCellBox + +An aggregated representation of the data contained within a geospatial boundary. The `AggregatedCellBox` object is created +by the `CellBox` object when the `MeshBuilder` returns the `EnvironmentalMesh`. + +::: meshiphi.mesh_generation.aggregated_cellbox.AggregatedCellBox + options: + merge_init_into_class: true + members: + - contains_point + - to_json + + + + + + + + diff --git a/docs/source/sections/Mesh_Construction/Mesh_construction_overview.rst b/docs/mesh_construction/index.md similarity index 50% rename from docs/source/sections/Mesh_Construction/Mesh_construction_overview.rst rename to docs/mesh_construction/index.md index d6a4bd82..aac36184 100644 --- a/docs/source/sections/Mesh_Construction/Mesh_construction_overview.rst +++ b/docs/mesh_construction/index.md @@ -1,59 +1,37 @@ -.. _mesh_construction_overview: - -******************************** -Methods - Mesh Construction -******************************** +# Methods - Mesh Construction Throughout this section we will outline an overview of the Environment Mesh Construction module, describe the main classes that composes the module and illustrate a use case for the Discrete Meshing of the environment. -Mesh Construction - Overview -############################## +## Mesh Construction - Overview + A general overview of the method can be seen below: -.. figure:: ../Figures/FlowDiagram_MeshGraph.png - :align: center - :width: 700 +![](../assets/figures/FlowDiagram_MeshGraph.png) +Overview figure of the Discrete Meshing from the multi-data input. - Overview figure of the Discrete Meshing from the multi-data input. +## Mesh Construction Design -Mesh Construction Design -############################## The below UML diagram describes how the Environment Mesh Construction module is designed. It depicts the classes of the module and how they interact with each other. -.. figure:: ../Figures/mesh-construct-UML.drawio.png - :align: center - :width: 1000 +![](../assets/figures/mesh-construct-UML.drawio.png) -Mesh Construction Use case -################################### +## Mesh Construction Use case + This sequence diagram illustrates a use case for the Discrete Meshing of the environment, where the module's client starts by initializing the MeshBuilder with a certain mesh configuration (see Input-Configuration section for more details about the configuration format) then calls build_environment_mesh method. -.. figure:: ../Figures/mesh-build-sequence-diagram.drawio.png - :align: center - :width: 1000 +![](../assets/figures/mesh-build-sequence-diagram.drawio.png) The following diagram depicts the sequence of events that take place inside build_environment_mesh method into details -.. figure:: ../Figures/build-env-mesh.drawio.png - :align: center - :width: 1000 +![](../assets/figures/build-env-mesh.drawio.png) -For a more in-depth explanation of the mesh construction methods, please refer to the :ref:`Mesh Construction - Classes` +For a more in-depth explanation of the mesh construction methods, please refer to the [Mesh Construction - Classes](classes.md) section. - - -Mesh Construction - Additional -################################ -.. toctree:: - :maxdepth: 1 - - ./Mesh_construction_classes - ./Mesh_validation diff --git a/docs/output.md b/docs/output.md new file mode 100644 index 00000000..2c07d951 --- /dev/null +++ b/docs/output.md @@ -0,0 +1,117 @@ +# Outputs - Data Types + +## The Mesh.json file + +Once a mesh has been constructed using MeshiPhi, it can then be exported as a json object and saved to a file. An example +of mesh construction and json object generation are as follows: + +```py +from meshiphi.mesh import Mesh + +with open('config.json', 'r') as f: + config = json.load(f) + +mesh = Mesh(config) +mesh_json = mesh.to_json() +``` + +!!! note + Examples and a description of the configuration files can be found in the [configuration - mesh construction](config/mesh_construction.md) section of this document. + + +The json object outputted by the Mesh consists of 3 sections: `config`, +`cellboxes` and `neighbour_graph`. + +```json +{ + "config": { + ... + }, + "cellboxes": [ + {...}, + ... + {...} + ], + "neighbour_graph": [ + "": { + ... + }, + ... + "id_n": { + ... + } + ] +} +``` + +where the parts of the json object can be understood as follows: + +* `config` : The configuration file used to generate the Mesh. +* `cellboxes` : A list of json representations of CellBox objects that form the Mesh. +* `neighbour_graph` : A graphical representation of the adjacency of CellBoxes within the Mesh. + +### `cellboxes` + +Each CellBox object within `cellboxes` in the outputted json object is of the following form: + +```json +{ + "id" (string): ..., + "geometry" (string): ..., + "cx" (float): ..., + "cy" (float): ..., + "dcx" (float): ..., + "dcy" (float): ..., + "" (float): ..., + ... + "" (float): ... +} +``` + +Where the values within the CellBox represent the following: + +* `id` : The index of the CellBox within the Mesh. +* `geometry` : The spatial boundaries of the CellBox. +* `cx` : The x-position of the centroid of the CellBox, given in degrees latitude. +* `cy` : The y-position of the centroid of the CellBox, given in degrees longitude. +* `dcx` : The x-distance from the edge of the CellBox to the centroid of the CellBox. Given in degrees longitude. +* `dxy` : the y-distance from the edge of the CellBox to the centroid of the CellBox. Given in degrees latitude. + +![](assets/figures/cellbox_json.png) + + +### `neighbour_graph` + +For each CellBox in the `cellboxes` section of the json object, there will be a corresponding entry in the `neighbour_graph`. + +!!! note + Once the vehicle accessibility conditions have been applied to the json object, this may no longer be true as inaccessible CellBoxes will be removed from *neighbour_graph* but will remain in *cellboxes* + +Each entry in the `neighbour_graph` is of the following form: + +```json +"": { + "1": [...], + "2": [...], + "3": [...], + "4": [...], + "-1": [...], + "-2": [...], + "-3": [...], + "-4": [...] +} +``` + +where each of the values represent the following: + +* **** : The id of a CellBox within `cellboxes` + * **1** : A list of id's of CellBoxes within `cellboxes` to the North-East of the CellBox specified by `id`. + * **2** : A list of id's of CellBoxes within `cellboxes` to the East of the CellBox specified by `id`. + * **3** : A list of id's of CellBoxes within `cellboxes` to the South-East of the CellBox specified by `id`. + * **4** : A list of id's of CellBoxes within `cellboxes` to the South-West of the CellBox specified by `id`. + * **-1** : A list of id's of CellBoxes within `cellboxes` to the South of the CellBox specified by `id`. + * **-2** : A list of id's of CellBoxes within `cellboxes` to the South-West of the CellBox specified by `id`. + * **-3** : A list of id's of CellBoxes within `cellboxes` to the North-West of the CellBox specified by `id`. + * **-4** : A list of id's of CellBoxes within `cellboxes` to the South of the CellBox specified by `id`. + +![](assets/figures/neighbour_graph_json.png) diff --git a/docs/source/sections/Code_overview.rst b/docs/overview.md similarity index 70% rename from docs/source/sections/Code_overview.rst rename to docs/overview.md index ad48b815..8a78390f 100644 --- a/docs/source/sections/Code_overview.rst +++ b/docs/overview.md @@ -1,27 +1,25 @@ -********** -Background -********** +# Background -Code Structure -############## +## Code Structure + The aim of this manual is to provide the user with all the tools that they need to run the software for a set of examples. We also hope that the background information supplied for each section allows the user to understand the methods used throughout this package. The separate stages of the codebase can be broken down into: -1. :ref:`Dataloaders ` - Reading in different datasets of differing types. Throughout this section +1. [Dataloaders](dataloaders/overview.md) - Reading in different datasets of differing types. Throughout this section we will outline the form that the input datasets should take and useful tips for pre-processing your data. -2. :ref:`Mesh Construction ` - Generating a non-uniform mesh representation of the +2. [Mesh Construction](mesh_construction/index.md) - Generating a non-uniform mesh representation of the environmental conditions. In this section we outline the different Python classes that are used to construct a discretised representation of the user-defined datasets, giving a coding background to the dynamic splitting of the mesh to finer resolution in regions of spatially varying data. -Each stage of this process makes use of a configuration file, found in the :ref:`Configuration Overview` section of the -documentation and produces an output file, the form of which can be found in the :ref:`outputs` section. +Each stage of this process makes use of a configuration file, found in the [Configuration Overview](config/overview.md) section of the +documentation and produces an output file, the form of which can be found in the [outputs](output.md) section. In addition to the core functionality of the package we have also developed a set of plotting classes that allow the user -to generate both interactive maps and static figures of the Mesh outputs. These can be found in the :ref:`Mesh Plotting` +to generate both interactive maps and static figures of the Mesh outputs. These can be found in the [Mesh Plotting](plotting.md) section later in the manual. \ No newline at end of file diff --git a/docs/plotting.md b/docs/plotting.md new file mode 100644 index 00000000..baae68ca --- /dev/null +++ b/docs/plotting.md @@ -0,0 +1,7 @@ +# Mesh Plotting + +As well as the interactive plotting functionality provided by [GeoPlot](https://github.com/bas-amop/GeoPlot), +Meshiphi also contains a built-in plotting class `mesh_plotter` that allows the user to produce static plots of a +given environmental mesh. + +::: meshiphi.mesh_plotting.mesh_plotter diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 338951c9..00000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,66 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# - -import meshiphi - -# -- Project information ----------------------------------------------------- - -project = meshiphi.__name__ -copyright = meshiphi.__copyright__ -author = meshiphi.__author__ - -# The full version, including alpha/beta/rc tags -release = meshiphi.__version__ - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.coverage", - "sphinx.ext.napoleon", - # 'rinoh.frontend.sphinx', - "sphinx.ext.todo", - "sphinx.ext.autosectionlabel", - "sphinx_rtd_theme", - "sphinx_markdown_builder", -] - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# # -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" # "agogo" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = [] - -# import sphinx_pdj_theme -# html_theme = 'sphinx_pdj_theme' -# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()] diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index f71d83ab..00000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,35 +0,0 @@ -Welcome to the MeshiPhi Manual Pages -====================================== - -MeshiPhi is a tool for the discretisation of environmental data with a non uniform resolution based on the variance of -the data. This software package has been developed by the **British Antarctic Survey** (BAS). It was initially designed -as part of a `route planning tool `_ for the BAS research vessel RRS Sir David -Attenborough, although it can be applied to any geospatial data. The software is written in Python and is open source. - -The package contains limited plotting functionality, which is described in the :ref:`Mesh Plotting` section. For -extended plotting functionality, we recommend using the GeoPlot package, which was also developed at BAS. This is -available from the following GitHub repository: `GeoPlot `_ - -For more information on the project, please visit the `AMOP website `_ -and follow our `GitHub repository `_. - - -.. note:: The development of this codebase is ongoing and not yet complete. - Please contact the developers for more information. - -Contents: - -.. toctree:: - :maxdepth: 2 - :numbered: - - ./sections/Installation - ./sections/Examples - ./sections/Command_line_interface - ./sections/Code_overview - ./sections/Configuration/Configuration_overview - ./sections/Outputs - ./sections/Dataloaders/overview - ./sections/Mesh_Construction/Mesh_construction_overview - ./sections/Plotting/mesh_plotting - ./sections/testing_strategy diff --git a/docs/source/sections/Command_line_interface.rst b/docs/source/sections/Command_line_interface.rst deleted file mode 100644 index 63b19407..00000000 --- a/docs/source/sections/Command_line_interface.rst +++ /dev/null @@ -1,212 +0,0 @@ -############################### -Command Line Interface -############################### - -The MeshiPhi package provides CLI entry points, used to build a digital environment from a heterogeneous collection of -source data. This digital environment file (mesh) can then be exported to a variety of file formats for use in other -systems, such as GIS software. The produced mesh file also interfaces directly with `PolarRoute `_, -BAS's route planning software, to provide optimal routes for a vehicle travelling through the mesh. - -^^^^^^^^^^^ -create_mesh -^^^^^^^^^^^ - -The *create_mesh* entry point builds a digital environment file from a collection of source data, which can then be saved -to a file for visualisation or use in other software. - -:: - - create_mesh - -positional arguments: - -:: - - config : A configuration file detailing how to build the digital environment. JSON parsable - -The format of the required ** file can be found in the :ref:`configuration - mesh construction` section of the documentation. -There are also example configuration files available in the directory :code:`examples/environment_config/grf_example.config.json` on GitHub. - -optional arguments: - -:: - - -v (verbose logging) - -o (set output location for mesh) - - -The format of the returned mesh.json file is explain in :ref:`the mesh.json file` section of the documentation. - - - -^^^^^^^^^^^ -export_mesh -^^^^^^^^^^^ -Once a mesh has been built using the :ref:`create_mesh` command, it can be exported other file types for -use in other systems (such as GIS software) using the the *export_mesh* command. - -:: - - export_mesh - -positional arguments: - -:: - - mesh : A digital environment file. - output_location : The location to save the exported mesh. - output_format : The format to export the mesh to. - - -supported output formats are: - * .json (default) [JSON] - * geo.json (collection of polygons for each cell in the mesh) [GEOJSON] - * .tif (rasterised mesh) [TIF] - * .png [PNG] - -optional arguments: - -:: - - -v : verbose logging - -o : output location - -format_conf: configuration file for output format (required for TIF export, optional for GEOJSON) - -an example of the format of the ** file required for .tif export is as follows: - -:: - - { - "data_name": "elevation", - "sampling_resolution": [ - 150, - 150 - ], - "projection": "3031", - "color_conf": "path to/color_conf.txt" - } - -where the variables are as follows: - * **data_name** : The name of the data to be exported. This is the name of the data layer in the mesh. - * **sampling_resolution** : The resolution of the exported mesh. This is a list of two values, the first being the x resolution and the second being the y resolution. - * **projection** : The projection of the exported mesh. This is a string of the EPSG code of the projection. - * **color_conf** : The path to the color configuration file. This is a text file containing the color scheme to be used when exporting the mesh. The format of this file is as follows: - -:: - - 0 240 250 160 - 30 230 220 170 - 60 220 220 220 - 100 250 250 250 - -The color_conf.txt contains 4 columns per line: the data_name value and the -corresponding red, green, blue value between 0 and 255. - -When using the *-format_conf* option for GEOJSON output the only variable required is the **data_name**. This specifies -which of the data layers you want to export as a single GEOJSON file. - -^^^^^^^^^^^^ -rebuild_mesh -^^^^^^^^^^^^ - -Once a mesh has been built using the :ref:`create_mesh` command the *rebuild_mesh* command allows a user to rebuild it based on the -original configs stored within the mesh file. This is primarily useful for debugging or to update old meshes produced with an older version -of the package. - -:: - - rebuild_mesh - -optional arguments: - -:: - - -v : verbose logging - -o : output location - - -^^^^^^^^^^^^^^ -merge_mesh -^^^^^^^^^^^^^^ - -When multiple compatible meshes have been created using the :ref:`create_mesh` command, they can be merged together using the :ref:`merge_mesh` command. -This will combine the meshes into a single mesh file, replacing cellboxes in mesh1 with cellboxes in mesh2 where they overlap. - - -:: - - merge_mesh - -positional arguments: - -:: - - mesh1 : A digital environment file. - mesh2 : A digital environment file. - -optional arguments: - -:: - - -v : verbose logging - -o : output location - -d, --directory : Flag indicating the mesh files to be merged are in a directory, not an individual file - - -^^^^^^^^^^^^^^^^^^^^^ -plot_mesh (GeoPlot) -^^^^^^^^^^^^^^^^^^^^^ -Meshes produced at any stage in the route planning process can be visualised using the GeoPlot -library found at the relevant `GitHub page `_. Meshes and routes can also be -plotted in other GIS software such as QGIS or ArcGIS by exporting the mesh to a common format such as .geojson or .tif -using the :ref:`export_mesh` command. - -:: - - plot_mesh - -optional arguments: - -:: - - -v : verbose logging - -o : output location - - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -meshiphi_test (for developers) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Compares the current working branch of Meshiphi to a different git branch, and runs relevant unit and regression tests -depending on what files are different between the branches. This command should be run to ensure consistent functionality before -creating a pull request. - -Requires git to be installed, and for Meshiphi to be installed with pip in developer mode from a local git repo. - -:file:`git clone git@github.com:bas-amop/MeshiPhi.git /path/to/meshiphi` downloads Meshiphi into :file:`/path/to/meshiphi` - -:file:`pip install -e /path/to/meshiphi` installs the local copy of Meshiphi in developer mode. - -:: - - meshiphi_test - -positional arguments: - -:: - - from_branch : Git branch that you want to test (optional, defaults to current branch) - into_branch : Git branch that you want to compare against - -optional arguments: - -:: - - -r : run regression tests only - -u : run unit tests only - -s : save mesh of failed regression tests to `./pytest_meshiphi`. This is to avoid having to recompute meshes upon pytest failure - -p : plots difference between newly generated mesh and the reference mesh for easier diagnosis. Only computes on pytest failure - - -If -s or -p flag provided, a folder :file:`pytest_meshiphi` will be created in your current working directory, populated with -the failing tests to aid debugging and avoid having to regenerate them manually. diff --git a/docs/source/sections/Configuration/Mesh_construction_config.rst b/docs/source/sections/Configuration/Mesh_construction_config.rst deleted file mode 100644 index b9335ba0..00000000 --- a/docs/source/sections/Configuration/Mesh_construction_config.rst +++ /dev/null @@ -1,396 +0,0 @@ -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Configuration - Mesh Construction -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Below is a full configuration file for building an environmental mesh using synthetic data generated from Gaussian -Random Fields (GRFs). This configuration file generates the fields 'SIC', 'elevation', 'thickness', 'density', 'uC, vC' -(currents) and 'u10, v10' (winds). The full configuration file is available in the file location -:code:`examples/environment_config/grf_example.config.json` on GitHub. Other example configuration files are also -available at this location, including configuration files which build meshes using real datasets. - - -.. code-block:: json - - { - "region": { - "lat_min": 0, - "lat_max": 10, - "long_min": 0, - "long_max": 10, - "start_time": "2017-02-01", - "end_time": "2017-02-04", - "cell_width": 10, - "cell_height": 10 - }, - "data_sources": [ - { - "loader": "scalar_grf", - "params": { - "data_name": "SIC", - "min": 0, - "max": 100, - "seed": 16, - "offset": 5, - "splitting_conditions": [ - { - "SIC": { - "threshold": 75, - "upper_bound": 1.0, - "lower_bound": 0.0 - } - } - ], - "dataloader_name": "scalar_grf", - "downsample_factors": [ - 1, - 1 - ], - "aggregate_type": "MEAN", - "min_dp": 5, - "in_proj": "EPSG:4326", - "out_proj": "EPSG:4326", - "x_col": "lat", - "y_col": "long", - "size": 512, - "alpha": 3, - "binary": false, - "threshold": [ - 0, - 1 - ], - "multiplier": 1 - } - }, - { - "loader": "scalar_grf", - "params": { - "data_name": "elevation", - "min": -100, - "max": 50, - "seed": 30, - "splitting_conditions": [ - { - "elevation": { - "threshold": -10, - "upper_bound": 1.0, - "lower_bound": 0.0 - } - } - ], - "dataloader_name": "scalar_grf", - "downsample_factors": [ - 1, - 1 - ], - "aggregate_type": "MEAN", - "min_dp": 5, - "in_proj": "EPSG:4326", - "out_proj": "EPSG:4326", - "x_col": "lat", - "y_col": "long", - "size": 512, - "alpha": 3, - "binary": false, - "threshold": [ - 0, - 1 - ], - "multiplier": 1, - "offset": 0 - } - }, - { - "loader": "scalar_grf", - "params": { - "data_name": "thickness", - "min": 0.65, - "max": 1.4, - "seed": 44, - "dataloader_name": "scalar_grf", - "downsample_factors": [ - 1, - 1 - ], - "aggregate_type": "MEAN", - "min_dp": 5, - "in_proj": "EPSG:4326", - "out_proj": "EPSG:4326", - "x_col": "lat", - "y_col": "long", - "size": 512, - "alpha": 3, - "binary": false, - "threshold": [ - 0, - 1 - ], - "multiplier": 1, - "offset": 0 - } - }, - { - "loader": "scalar_grf", - "params": { - "data_name": "density", - "min": 850, - "max": 1000, - "seed": 40, - "dataloader_name": "scalar_grf", - "downsample_factors": [ - 1, - 1 - ], - "aggregate_type": "MEAN", - "min_dp": 5, - "in_proj": "EPSG:4326", - "out_proj": "EPSG:4326", - "x_col": "lat", - "y_col": "long", - "size": 512, - "alpha": 3, - "binary": false, - "threshold": [ - 0, - 1 - ], - "multiplier": 1, - "offset": 0 - } - }, - { - "loader": "vector_grf", - "params": { - "data_name": "uC,vC", - "min": 0, - "max": 1, - "seed": 21, - "dataloader_name": "vector_grf", - "downsample_factors": [ - 1, - 1 - ], - "aggregate_type": "MEAN", - "min_dp": 5, - "in_proj": "EPSG:4326", - "out_proj": "EPSG:4326", - "x_col": "lat", - "y_col": "long", - "size": 512, - "alpha": 3, - "vec_x": "uC", - "vec_y": "vC" - } - }, - { - "loader": "vector_grf", - "params": { - "data_name": "u10,v10", - "min": 0, - "max": 1, - "seed": 21, - "dataloader_name": "vector_grf", - "downsample_factors": [ - 1, - 1 - ], - "aggregate_type": "MEAN", - "min_dp": 5, - "in_proj": "EPSG:4326", - "out_proj": "EPSG:4326", - "x_col": "lat", - "y_col": "long", - "size": 512, - "alpha": 3, - "vec_x": "uC", - "vec_y": "vC" - } - } - ], - "splitting": { - "split_depth": 6, - "minimum_datapoints": 5 - } - } - -The configuration file used for mesh construction contains information required to build a discretised model of the environment. -Information here dictates the region in which the mesh is constructed, the data contained within the mesh and how the -mesh is split to a non-uniform resolution. The configuration file used to generate a mesh is stored in the output mesh json -in a section titled 'mesh_info'. - -The mesh configuration file contains three primary sections: - -################ -Region -################ -The region section gives detailed information for the construction of the Discrete Mesh. The main definitions are the -bounding region and temporal portion of interest (:code:`long_min`, :code:`lat_min`, :code:`long_max`, :code:`lat_max`, :code:`start_time`, :code:`end_time`), but -also the starting shape of the spatial grid cell boxes (:code:`cell_width`, :code:`cell_height`) is defined before splitting is -applied. Further detail on each parameter is given below: - -:: - - "region": { - "lat_min": 0, - "lat_max": 10, - "long_min": 0, - "long_max": 10, - "start_time": "2017-02-01", - "end_time": "2017-02-04", - "cell_width": 10, - "cell_height": 10 - } - -where the variables are as follows: - -* **long_min** *(float, degrees)* : Minimum Longitude Edge of the Mesh -* **long_max** *(float, degrees)* : Maximum Longitude Edge of the Mesh -* **lat_min** *(float, degrees)* : Minimum Latitude Edge of the Mesh -* **lat_max** *(float, degrees)* : Maximum Latitude Edge of the Mesh -* **start_time** *(string, 'YYYY-mm-dd')* : Start Datetime of Time averaging -* **end_time** *(string, 'YYYY-mm-dd')* : End Datetime of Time averaging -* **cell_width** *(float, degrees)* : Initial Cell Box Width prior to splitting -* **cell_height** *(float, degrees)* : Initial Cell Box Height prior to splitting - -.. note:: - Variables **start_time** and **end_time** also support reference to system time using - the keyword **TODAY** *e.g.* - - "startTime": "TODAY" , "endTime": "TODAY + 5" - - "startTime": "TODAY - 3", "endTime": "TODAY" - -############ -Data Sources -############ - -The 'data_sources' section of the configuration file defines which information will be added to the -mesh when constructed. Each item in the list of data sources represents a single dataset to be added -to the mesh. - -:: - - "data_sources": [ - { - "loader": "scalar_grf", - "params": { - "data_name": "SIC", - "min": 0, - "max": 100, - "seed": 16, - "offset": 5, - "splitting_conditions": [ - { - "SIC": { - "threshold": 75, - "upper_bound": 1.0, - "lower_bound": 0.0 - } - } - ], - "dataloader_name": "scalar_grf", - "downsample_factors": [ - 1, - 1 - ], - "aggregate_type": "MEAN", - "min_dp": 5, - "in_proj": "EPSG:4326", - "out_proj": "EPSG:4326", - "x_col": "lat", - "y_col": "long", - "size": 512, - "alpha": 3, - "binary": false, - "threshold": [ - 0, - 1 - ], - "multiplier": 1 - } - }, - ... other data_sources - ] - - -where the variables are as follows: - - -* **loader** *(string)* : The name of the data loader to be used to add this data source to the mesh - see the :ref:`abstractScalarDataloader doc page` for further information about the available data loaders. -* **params** *(dict)* : A dictionary containing optional parameters which may be required by the specified data loader in 'loader'. These parameters include the following: - - * **value_fill_types** *(string)* : Determines the actions taken if a cellbox is generated with no data. The possible values are either parent (which implies assigning the value of the parent cellbox), zero or nan. - * **aggregate_type** *(string)* : Specifies how the data within a cellbox will be aggregated. By default aggregation takes place by calculating the mean of all data points within the CellBoxes bounds. *aggregate_type* allows this default to be changed to other aggregate function (e.g. MIN, MAX, COUNT). - * **[scalar] splitting_conditions** *(list)* : The conditions which determine if a cellbox should be split based on a scalar dataset. - * **threshold** *(float)* : The threshold above or below which CellBoxes will be sub-divided to separate the datapoints into homogeneous cells. - * **upperBound** *(float)* : A percentage normalised between 0 and 1. A CellBox is deemed homogeneous if greater than this percentage of data points are above the given threshold. - * **lowerBound** *(float)* : A percentage normalised between 0 and 1. A Cellbox is deemed homogeneous if less than this percentage of data points are below the given threshold. - * **[vector] splitting_conditions** *(list)* : The conditions which determine if a cellbox should be split based on a vector dataset. - * **curl** *(float)* : The threshold value above which a cellbox will split. Is calculated as the maximum value of **Curl(F)** within a cellbox (where **F** is the vector field). - -.. note:: - Splitting conditions are applied in the order they are specified in the configuration file. - - -######### -Splitting -######### - -Non-uniform mesh refinement is done by selectively sub-dividing cells. Cell -sub-division is performed whenever a cell (of any size) is determined to be -inhomogeneous with respect to a specific characteristic of interest such as -SIC or ocean depth (this characteristic is defined as a splitting condition -inside the data source's params as illustrated above). - -In the figure below, a graphical representation of the splitting -decision making process is shown. In this, the blue histogram represents an -arbitrary dataset, the orange histogram represents the values in the dataset -that are greater than the threshold (and denoted 'A' in the formulae), the -black line is the threshold value, 'UB' is the upper bound, and 'LB' is the -lower bound. To be specific, this is a probability distribution, and hence the -area under the orange curve 'A' is a decimal fraction of the total dataset -(which would have an area of 1). - -.. _splitting_fig: -.. figure:: ../Figures/splitting_conditions.png - :align: center - :width: 700 - - *Plot showing how cellbox homogeneity is decided* - -* If the orange area :code:`A <= LB`, then the homogeneity condition is :code:`CLR`. -* If the orange area :code:`A >= LB`, then the homogeneity condition is :code:`HOM`. -* If the orange area :code:`LB < A < UB`, then the homogeneity condition is :code:`HET`. - -:code:`CLR`, :code:`HOM`, and :code:`HET` are used to determine if a cellbox -should be split or not. There is also a fourth homogeneity condition :code:`MIN` -which is only triggered when the number of datapoints within the cellbox is lower -than the minimum_datapoints specified in the config. The values are checked in this order: - -#. :code:`MIN` - Do not split the cellbox -#. :code:`CLR` - Do not split the cellbox, but allow splitting if other datasets return :code:`HET` -#. :code:`HOM` - Do not split the cellbox -#. :code:`HET` - Split the cellbox - -In the extreme case where :code:`UB = 1` and :code:`LB = 0`, the cellbox will -always split if there are any datapoints above or below the UB/LB respectively. -Imagining a plot similar to the figure above, - -* If the histogram is entirely blue, :code:`return 'CLR'` -* If the histogram is entirely orange, :code:`return 'HOM'` -* If there's both colours, :code:`return 'HET'` - -The splitting section of the Configuration file defines the splitting parameters that are *common* across all the data sources and determines how the CellBoxes that form the -Mesh will be sub-divided based on the homogeneity of the data points contained within to form a mesh -of non-uniform spatial resolution. -:: - - "splitting": { - "split_depth":6, - "minimum_datapoints":5 - } - -where the variables are as follows: - -* **split_depth** *(float)* : The number of times the MeshBuilder will sub-divide each initial cellbox (subject to satisfying the splitting conditions of each data source) -* **minimum_datapoints** *(float)* : The minimum number of datapoints a cellbox must contain for each value type to be able to split diff --git a/docs/source/sections/Dataloaders/AddingDataloaders.rst b/docs/source/sections/Dataloaders/AddingDataloaders.rst deleted file mode 100644 index 568cd652..00000000 --- a/docs/source/sections/Dataloaders/AddingDataloaders.rst +++ /dev/null @@ -1,97 +0,0 @@ -.. _adding-dataloaders: - -Adding New Dataloaders -============================ - -Adding to the repository ------------------------- - -Each dataloader is to be implemented as a separate object for the environmental mesh to interface with. -The general workflow for creating a new dataloader is as follows: - -#. Choose an appropriate dataloader type (see :ref:`Dataloader Types`). -#. Create a new file under :code:`meshiphi.DataLoaders/{dataloader-type}` with an appropriate name. -#. Create :code:`import_data()` and (optionally) :code:`add_default_params()` methods. Examples of how to do this are shown on the :ref:`abstractScalar` and :ref:`abstractVector` pages. -#. Add a new entry to the dataloader factory object, within :code:`meshiphi.Dataloaders/Factory.py`. Instructions on how to do so are shown in :ref:`dataloader-factory` - -After performing these actions, the dataloader should be ready to go. It is useful for debugging purposes -to create the dataloader object from within :code:`meshiphi.Dataloaders/Factory.py` (e.g. within -:code:`if __name__=='__main__':` ) and test its functionality before deploying it. - - - -Adding within iPython Notebooks -------------------------------- - -If you do not wish to modify the repo to add a dataloader, you may add one into the mesh by calling the -:code:`add_dataloader()` method of :ref:`MeshBuilder`. - -An example of how to do this is detailed below. Assuming you're working out of a Jupyter notebook, the -basic steps would be to - -#. Create a dataloader - :: - - # Import the abstract dataloader as the base class - from meshiphi.dataloaders.scalar.abstract_scalar import ScalarDataLoader - - # Set up dataloader in the same way as the existing dataloaders - class MyDataLoader(ScalarDataLoader): - # Only user defined function required - def import_data(self, bounds): - # Read in data - if len(self.files) == 1: data = xr.open_dataset(self.files[0]) - else: data = xr.open_mfdataset(self.files) - # Trim data to boundary - data = self.trim_datapoints(bounds, data=data) - - return data - -#. Create a dictionary of parameters to initialise the dataloader - :: - - # Params formatted same way as dataloaders in config - params = { - 'files': [ - 'PATH_TO_FILE_1', - 'PATH_TO_FILE_2', - ... # Populate with as many files as you need - ], - 'data_name': 'my_data', - 'splitting_conditions':[ - { - 'my_data':{ - 'threshold': 0.5, - 'upper_bound': 0.9, - 'lower_bound': 0.1 - } - } - ] - } - -#. Initialise an Environmental Mesh - :: - - import json - from meshiphi.import MeshBuilder - - # Config to initialise mesh from - with open('config.json', 'r') as fp: - config = json.load(fp) - - # Build a mesh from the config - mesh_builder = MeshBuilder(config) - env_mesh = mesh_builder.build_environmental_mesh() - -#. Add dataloader to mesh - :: - - # Set up bounds of data in dataloader - from meshiphi.import Boundary - bounds = Boundary.from_json(config) - - # Add dataloader to mesh builder and regenerate mesh - modified_builder = mesh_builder.add_dataloader(MyDataLoader, params, bounds) - modified_mesh = modified_builder.build_environmental_mesh() - - diff --git a/docs/source/sections/Dataloaders/DataLoaderInterface.rst b/docs/source/sections/Dataloaders/DataLoaderInterface.rst deleted file mode 100644 index 72aefc17..00000000 --- a/docs/source/sections/Dataloaders/DataLoaderInterface.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. _dataloader-interface: - -******************** -Dataloader Interface -******************** - -Shows how the mesh generation code may interact with the dataloaders. In operation, -only get_hom_condition() and get_value() are needed realistically. Other methods are -implemented in the :ref:`abstractScalar` and -:ref:`abstractVector` dataloaders. - -.. automodule:: meshiphi.dataloaders.dataloader_interface - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/Factory.rst b/docs/source/sections/Dataloaders/Factory.rst deleted file mode 100644 index f32de951..00000000 --- a/docs/source/sections/Dataloaders/Factory.rst +++ /dev/null @@ -1,76 +0,0 @@ -.. _dataloader-factory: - -****************** -Dataloader Factory -****************** - -The dataloader factory produces dataloader objects based off of parameter -inputs provided in the config file. The parameters needed in the config are -defined in the :code:`get_dataloader()` method of the factory. At the very -least, a name must be provided to select the dataloader from all those that -are available. - - - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Adding New Dataloader to Factory -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Two actions must be performed to add a new dataloader to the Factory object. -Optionally, a third may be added if you want to add a new default value for -a parameter the dataloader requires. The actions are: - -#. Import the dataloader -#. Add an entry to the :code:`dataloader_requirements` dictionary - -^^^^^^^ -Example -^^^^^^^ -In this example, a new scalar dataloader `myScalarDataloader` has been created, and -is located at :code:`meshiphi.Dataloaders/Scalar/myScalarDataloader.py`. - -The only parameter required by this dataloader is a file to read data from. 'files' -is passed as a mandatory parameter, as 'file' and 'folder' both get translated into -a list of files, and stored in params under the key 'files':: - - # Add new import statement for Factory to read - from meshiphi.Dataloaders.Scalar.myScalarDataloader import myScalarDataloader - - ... - - class DataLoaderFactory: - ... - def get_dataloader(self, name, bounds, params, min_dp=5): - ... - dataloader_requirements = { - ... - # Add new dataloaders - 'myscalar': (myScalarDataloader, ['files']) - ... - ... - ... - - -To call this dataloader, add an entry in the :code:`config.json` -file used to generate the mesh. Alternatively, add a folder, or a list of -individual files:: - - { - "loader": "myscalar", - "params": { - "file": "PATH_TO_DATA_FILE" # For a single file - "folder": "PATH_TO_FOLDER" # For a folder, must have trailing '/' - "files":[ # For a list of individual files - "PATH_TO_FILE_1", - "PATH_TO_FILE_2", - ... - ] - } - } - -^^^^^^^^^^^^^^^^^^^^^^^^^ -Dataloader Factory Object -^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. automodule:: meshiphi.dataloaders.factory - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/lut/abstractLUT.rst b/docs/source/sections/Dataloaders/lut/abstractLUT.rst deleted file mode 100644 index fdc79794..00000000 --- a/docs/source/sections/Dataloaders/lut/abstractLUT.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. _abstract-lut-dataloader: - -********************************* -Abstract Look Up Table Dataloader -********************************* - -.. automodule:: meshiphi.dataloaders.lut.abstract_lut - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/lut/implemented/Density.rst b/docs/source/sections/Dataloaders/lut/implemented/Density.rst deleted file mode 100644 index 28de1191..00000000 --- a/docs/source/sections/Dataloaders/lut/implemented/Density.rst +++ /dev/null @@ -1,15 +0,0 @@ -****************** -Density Dataloader -****************** - -Density values were taken from the paper 'Thickness distribution of Antarctic sea ice' -(Worby, A.P. et al.). This paper took a density model from the paper 'Structure, principal -properties and strength of Antarctic sea ice' (Buynitskiy, V.K.). - -Name in config: :code:`'density'` - -Data is generated using the values from this paper, and so no data file is available for download. - -.. automodule:: meshiphi.dataloaders.lut.density - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/lut/implemented/LutCSV.rst b/docs/source/sections/Dataloaders/lut/implemented/LutCSV.rst deleted file mode 100644 index 6c2b8b12..00000000 --- a/docs/source/sections/Dataloaders/lut/implemented/LutCSV.rst +++ /dev/null @@ -1,16 +0,0 @@ -****************** -LUT CSV Dataloader -****************** - -The scalar CSV dataloader is designed to take any `.csv` file and cast -it into a data source for mesh construction. It was primarily used in testing -for loading dummy data to test performance. As such, there is no data source -for this dataloader. The CSV must have two columns: 'geometry' and 'data_name'. -'geometry' must have that title, and is a shapely wkt string. data_name can have -any name, and is just the value that is associated with the polygon. - -Name in config: :code:`'lut_csv'` - -.. automodule:: meshiphi.dataloaders.lut.lut_csv - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/lut/implemented/LutGeoJSON.rst b/docs/source/sections/Dataloaders/lut/implemented/LutGeoJSON.rst deleted file mode 100644 index 1c0805f3..00000000 --- a/docs/source/sections/Dataloaders/lut/implemented/LutGeoJSON.rst +++ /dev/null @@ -1,15 +0,0 @@ -********************** -LUT GeoJSON Dataloader -********************** - -The scalar CSV dataloader is designed to take any geojson file and cast -it into a data source for mesh construction. It was primarily used in testing -for loading dummy data to test performance. When using this dataloader, a value -should be provided in the mesh config file that specifies the value and data_name -that the polygons save. The keyword in the config params is 'value'. - -Name in config: :code:`'lut_geojson'` - -.. automodule:: meshiphi.dataloaders.lut.lut_geojson - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/lut/implemented/Scotland_NCMPA.rst b/docs/source/sections/Dataloaders/lut/implemented/Scotland_NCMPA.rst deleted file mode 100644 index 39677c5c..00000000 --- a/docs/source/sections/Dataloaders/lut/implemented/Scotland_NCMPA.rst +++ /dev/null @@ -1,14 +0,0 @@ -************************* -Scotland NCMPA Dataloader -************************* - -GeoJSON files are provided by the Scottish government for Nature Conservation Marine Protected Areas. - -Name in config: :code:`'scotland_ncmpa'` - -Data can be downloaded from `here `_ - - -.. automodule:: meshiphi.dataloaders.lut.scotland_ncmpa - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/lut/implemented/Thickness.rst b/docs/source/sections/Dataloaders/lut/implemented/Thickness.rst deleted file mode 100644 index 8d0db254..00000000 --- a/docs/source/sections/Dataloaders/lut/implemented/Thickness.rst +++ /dev/null @@ -1,15 +0,0 @@ -******************** -Thickness Dataloader -******************** - -Thickness values were taken from the paper 'Thickness distribution of Antarctic sea ice' -(Worby, A.P. et al.). - -Name in config: :code:`'thickness'` - -Data is generated using the values from this paper, and so no -data file is available for download. - -.. automodule:: meshiphi.dataloaders.lut.thickness - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/lut/index.rst b/docs/source/sections/Dataloaders/lut/index.rst deleted file mode 100644 index 88c86c3d..00000000 --- a/docs/source/sections/Dataloaders/lut/index.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _abstract-lut-dataloader-index: - -*************** -LUT Dataloaders -*************** - - -^^^^^^^^^^^^^^^^^^^^^^^ -Abstract LUT Base Class -^^^^^^^^^^^^^^^^^^^^^^^ -.. toctree:: - :maxdepth: 1 - :glob: - - ./abstractLUT - -The Abstract Base Class of the Look Up Table dataloaders holds most of the -functionality that would be needed to manipulate the data to work -with the mesh. When creating a new dataloader, the user must define -how to open the data files, and what methods are required to manipulate -the data into a standard format. More details are provided on the -:ref:`abstractVector doc page` - - -^^^^^^^^^^^^^^^^^^^^^^^ -LUT Dataloader Examples -^^^^^^^^^^^^^^^^^^^^^^^ -Creating a LUT dataloader is almost identical to creating a -:ref:`scalar dataloader`. The key differences -are that the `LUTDataLoader` abstract base class must be used, and -regions are defined by Shapely polygons. Data is imported and saved as -GeoPandas dataframes, holding a polygon and an associated value. - -^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Implemented LUT Dataloaders -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. toctree:: - :maxdepth: 1 - :glob: - - ./implemented/* \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/abstractScalar.rst b/docs/source/sections/Dataloaders/scalar/abstractScalar.rst deleted file mode 100644 index d9306d5a..00000000 --- a/docs/source/sections/Dataloaders/scalar/abstractScalar.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. _abstract-scalar-dataloader: - -************************** -Abstract Scalar Dataloader -************************** - -.. automodule:: meshiphi.dataloaders.scalar.abstract_scalar - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/AMSR.rst b/docs/source/sections/Dataloaders/scalar/implemented/AMSR.rst deleted file mode 100644 index a4dfb701..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/AMSR.rst +++ /dev/null @@ -1,22 +0,0 @@ -*************** -AMSR Dataloader -*************** - -The AMSR (Advanced Microwave Scanning Radiometer) dataset is a publicly -available that provides Sea Ice Concentration scans of the earth's oceans. -It is produced by researchers at the University of Bremen. - -The AMSR dataloader is currently the only 'standalone' dataloader, in that it -is defined independently of the abstract base class. This is due to issues -with :code:`pandas` calculating mean values differently depending on how the -data is loaded. This caused issues with the regression tests passing. -This issue will be rectified soon by updating the regression tests. - -Name in config: :code:`'amsr'` - -Data can be downloaded from `here `_ - - -.. automodule:: meshiphi.dataloaders.scalar.amsr - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/BSOSEDepth.rst b/docs/source/sections/Dataloaders/scalar/implemented/BSOSEDepth.rst deleted file mode 100644 index 6c03eef5..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/BSOSEDepth.rst +++ /dev/null @@ -1,24 +0,0 @@ -********************** -BSOSE Depth Dataloader -********************** - -B-SOSE (Biogeochemical Southern Ocean State Estimate solution) provide a publicly available dataset that -hosts (amongst other products) sea ice concentration (SIC) of the southern ocean. Their SIC product provides -a 'depth' value, which this dataloader ingests. -BSOSE is an extension of the SOSE project led by Mazloff at the Scripps Institution of Oceanography. - -From their website: - The Southern Ocean State Estimate (SOSE) is a model-generated best fit to Southern Ocean - observations. As such, it provides a quantitatively useful climatology of the mean-state - of the Southern Ocean. - -Name in config: :code:`'bsose_depth'` - -Data can be downloaded from `here `_ - -Note: This dataloader may not work "as is" for new data downloaded, it has been internally collated into -a more easily ingestable format. - -.. automodule:: meshiphi.dataloaders.scalar.bsose_depth - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/BSOSESeaIce.rst b/docs/source/sections/Dataloaders/scalar/implemented/BSOSESeaIce.rst deleted file mode 100644 index 63d56f22..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/BSOSESeaIce.rst +++ /dev/null @@ -1,24 +0,0 @@ -************************ -BSOSE Sea Ice Dataloader -************************ - -B-SOSE (Biogeochemical Southern Ocean State Estimate solution) provide a publicly available dataset that -hosts (amongst other products) sea ice concentration of the southern ocean. It is an extension of the -SOSE project led by Mazloff at the Scripps Institution of Oceanography. - -From their website: - The Southern Ocean State Estimate (SOSE) is a model-generated best fit to Southern Ocean - observations. As such, it provides a quantitatively useful climatology of the mean-state - of the Southern Ocean. - -Name in config: :code:`'bsose_sic'` - -Data can be downloaded from `here `_ - -Note: This dataloader may not work as is for new data downloaded, it has been internally collated into -a more easily ingestable format. - - -.. automodule:: meshiphi.dataloaders.scalar.bsose_sea_ice - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/BalticSeaIce.rst b/docs/source/sections/Dataloaders/scalar/implemented/BalticSeaIce.rst deleted file mode 100644 index 2fea523a..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/BalticSeaIce.rst +++ /dev/null @@ -1,18 +0,0 @@ -************************* -Baltic Sea Ice Dataloader -************************* - -Baltic sea ice concentration values are provided by the Finnish Meteorological Institute (FMI). -From their webpage: - - The operational sea ice service at FMI provides ice parameters over the Baltic Sea. - The parameters are based on ice chart produced on daily basis during the - Baltic Sea ice season and show the ice concentration in a 1 km grid. - -Name in config: :code:`'baltic_sic'` - -Data can be downloaded from `here `_ - -.. automodule:: meshiphi.dataloaders.scalar.baltic_sea_ice - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/BinaryGRF.rst b/docs/source/sections/Dataloaders/scalar/implemented/BinaryGRF.rst deleted file mode 100644 index 7efa3d27..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/BinaryGRF.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. _binary-grf: - -********************* -Binary GRF Dataloader -********************* - -The binary GRF dataloader is the same as the :ref:`Scalar GRF` -The only difference is that instead of returning a dataframe that consists -of values between the min/max set in the config, this dataframe will contain -only True/False. It is useful for generating land masks. - -.. code-block:: - :caption: Default parameters for binary/mask GRF dataloader - - { - "loader": "binary_grf", - "params":{ - "data_name": "data", # - Name of the data column - "seed": None, # - Seed for random number generator. Must - # be int or None. None sets a random seed - "size": 512, # - Number of datapoints per lat/long axis - "alpha": 3, # - Power of the power-law momentum - # distribution used to generate GRF - "min": 0, # - Minimum value of GRF - "max": 1, # - Maximum value of GRF - "binary": True, # - Flag specifying this GRF is a binary mask - "threshold": 0.5 # - Value around which mask values are set. - # Below this, values are set to False - # Above this, values are set to True - } - } - -Name in config: :code:`'binary_grf'` - -See the :ref:`Scalar GRF page` for documentation on the dataloader diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ECMWFSigWaveHeight.rst b/docs/source/sections/Dataloaders/scalar/implemented/ECMWFSigWaveHeight.rst deleted file mode 100644 index 4ee4a2c1..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ECMWFSigWaveHeight.rst +++ /dev/null @@ -1,23 +0,0 @@ -***************************** -ECMWFSigWaveHeight Dataloader -***************************** - -The ECMWF (European Centre for Medium-Range Weather Forecasts) are both a -research institute and a 24/7 operational service, producing global numerical -weather predictions and other data for their Member and Co-operating States -and the broader community. The Centre has one of the largest supercomputer -facilities and meteorological data archives in the world. Other strategic -activities include delivering advanced training and assisting the WMO in -implementing its programmes. -(description taken from `here `_) - -Name in config: :code:`'ecmwf_sig_wave_height'` - -Data can be downloaded from `this link `_ - -This dataloader is for the grib2 files. - - -.. automodule:: meshiphi.dataloaders.scalar.ecmwf_sig_wave_height - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ERA5MaxWaveHeight.rst b/docs/source/sections/Dataloaders/scalar/implemented/ERA5MaxWaveHeight.rst deleted file mode 100644 index 09790240..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ERA5MaxWaveHeight.rst +++ /dev/null @@ -1,24 +0,0 @@ -*********************************** -ERA5 Maximum Wave Height Dataloader -*********************************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_max_wave_height'` - -Instructions for how to download their data products are -available `here `_ - - -.. automodule:: meshiphi.dataloaders.scalar.era5_max_wave_height - :special-members: __init__ - :members: diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ERA5SigWaveHeight.rst b/docs/source/sections/Dataloaders/scalar/implemented/ERA5SigWaveHeight.rst deleted file mode 100644 index 6762fe88..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ERA5SigWaveHeight.rst +++ /dev/null @@ -1,24 +0,0 @@ -*************************************** -ERA5 Significant Wave Height Dataloader -*************************************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_sig_wave_height'` - -Instructions for how to download their data products are -available `here `_ - - -.. automodule:: meshiphi.dataloaders.scalar.era5_sig_wave_height - :special-members: __init__ - :members: diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WaveDirection.rst b/docs/source/sections/Dataloaders/scalar/implemented/ERA5WaveDirection.rst deleted file mode 100644 index ac559419..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WaveDirection.rst +++ /dev/null @@ -1,24 +0,0 @@ -*********************************** -ERA5 Mean Wave Direction Dataloader -*********************************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_wave_dir'` - -Instructions for how to download their data products are -available `here `_ - - -.. automodule:: meshiphi.dataloaders.scalar.era5_mean_wave_direction - :special-members: __init__ - :members: diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WavePeriod.rst b/docs/source/sections/Dataloaders/scalar/implemented/ERA5WavePeriod.rst deleted file mode 100644 index 4176caab..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WavePeriod.rst +++ /dev/null @@ -1,24 +0,0 @@ -******************************** -ERA5 Mean Wave Period Dataloader -******************************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_wave_period'` - -Instructions for how to download their data products are -available `here `_ - - -.. automodule:: meshiphi.dataloaders.scalar.era5_wave_period - :special-members: __init__ - :members: diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WindDirection.rst b/docs/source/sections/Dataloaders/scalar/implemented/ERA5WindDirection.rst deleted file mode 100644 index ccbcdf6a..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WindDirection.rst +++ /dev/null @@ -1,24 +0,0 @@ -****************************** -ERA5 Wind Direction Dataloader -****************************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_wind_dir'` - -Instructions for how to download their data products are -available `here `_ - - -.. automodule:: meshiphi.dataloaders.scalar.era5_wind_dir - :special-members: __init__ - :members: diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WindMagnitude.rst b/docs/source/sections/Dataloaders/scalar/implemented/ERA5WindMagnitude.rst deleted file mode 100644 index 0735f36c..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ERA5WindMagnitude.rst +++ /dev/null @@ -1,24 +0,0 @@ -****************************** -ERA5 Wind Magnitude Dataloader -****************************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_wind_mag'` - -Instructions for how to download their data products are -available `here `_ - - -.. automodule:: meshiphi.dataloaders.scalar.era5_wind_mag - :special-members: __init__ - :members: diff --git a/docs/source/sections/Dataloaders/scalar/implemented/GEBCO.rst b/docs/source/sections/Dataloaders/scalar/implemented/GEBCO.rst deleted file mode 100644 index 4e1d17d7..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/GEBCO.rst +++ /dev/null @@ -1,15 +0,0 @@ -**************** -GEBCO Dataloader -**************** - -The General Bathymetric Chart of the Oceans (GEBCO) is a publicly available -bathymetric chart of the Earth's oceans. It is a common resource used by -ocean scientists, amongst others. - -Name in config: :code:`'gebco'` - -Data can be downloaded from `here `_ - -.. automodule:: meshiphi.dataloaders.scalar.gebco - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/IceNet.rst b/docs/source/sections/Dataloaders/scalar/implemented/IceNet.rst deleted file mode 100644 index a2fea4fd..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/IceNet.rst +++ /dev/null @@ -1,21 +0,0 @@ -***************** -IceNet Dataloader -***************** - -IceNet is a seasonal sea ice forecasting tool being developed by researchers -at the British Antarctic Survey. From the website: - - IceNet is a probabilistic, deep learning sea ice forecasting system - developed by an international team and led by British Antarctic Survey - and The Alan Turing Institute [Andersson et al., 2021]. IceNet has been - trained on climate simulations and observational data to forecast the - next 6 months of monthly-averaged sea ice concentration maps. - -Name in config: :code:`'icenet'` - -Data for IceNet V1 is available from `here `_ -Data for IceNet V2 is not publicly available. - -.. automodule:: meshiphi.dataloaders.scalar.icenet - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/MODIS.rst b/docs/source/sections/Dataloaders/scalar/implemented/MODIS.rst deleted file mode 100644 index 81168511..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/MODIS.rst +++ /dev/null @@ -1,19 +0,0 @@ -**************** -MODIS Dataloader -**************** - -Moderate Resolution Imaging Spectroradiometer (MODIS) is a satellite-borne -instrument developed by NASA. - -From their website: - MODIS are viewing the entire Earth's surface every 1 to 2 days, - acquiring data in 36 spectral bands, or groups of wavelengths. - -Name in config: :code:`'modis'` - -Information on where to download their data products can be -found `here `_ - -.. automodule:: meshiphi.dataloaders.scalar.modis - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ScalarCSV.rst b/docs/source/sections/Dataloaders/scalar/implemented/ScalarCSV.rst deleted file mode 100644 index 3c59d02e..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ScalarCSV.rst +++ /dev/null @@ -1,14 +0,0 @@ -********************* -Scalar CSV Dataloader -********************* - -The scalar CSV dataloader is designed to take any `.csv` file and cast -it into a data source for mesh construction. It was primarily used in testing -for loading dummy data to test performance. As such, there is no data source -for this dataloader. - -Name in config: :code:`'scalar_csv'` - -.. automodule:: meshiphi.dataloaders.scalar.scalar_csv - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/ScalarGRF.rst b/docs/source/sections/Dataloaders/scalar/implemented/ScalarGRF.rst deleted file mode 100644 index a2da6753..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/ScalarGRF.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. _scalar-grf: - -********************* -Scalar GRF Dataloader -********************* - -Produces a gaussian random field of scalar values, useful for producing -artificial, yet somewhat realistic values for real-world variables. - -Name in config: :code:`'scalar_grf'` - -Can be used to generate :ref:`binary masks`. - -For vector fields, see the :ref:`Vector GRF page`. - -.. code-block:: - :caption: Default parameters for scalar GRF dataloader. - - { - "loader": "scalar_grf", - "params":{ - "data_name": "data", # - Name of the data column - "seed": None, # - Seed for random number generator. Must - # be int or None. None sets a random seed - "size": 512, # - Number of datapoints per lat/long axis - "alpha": 3, # - Power of the power-law momentum - # distribution used to generate GRF - "binary": False, # - Flag specifying this GRF isn't a binary mask - "threshold": [0, 1], # - Caps of min/max values to ensure normalising - # not skewed by outlier in randomised GRF - "min": -10, # - Minimum value of GRF - "max": 10, # - Maximum value of GRF - "multiplier": 1, # - Multiplier for entire dataset - "offset": 0 # - Offset for entire dataset - } - } - -NOTE: min/max are set BEFORE multiplier and offset are used. The actual values for -the min and max are - -| :code:`actual_min = multiplier * min + offset` -| :code:`actual_max = multiplier * max + offset` - -The dataloader is implemented as follows: - -.. automodule:: meshiphi.dataloaders.scalar.scalar_grf - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/Shape.rst b/docs/source/sections/Dataloaders/scalar/implemented/Shape.rst deleted file mode 100644 index b6702655..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/Shape.rst +++ /dev/null @@ -1,12 +0,0 @@ -**************** -Shape Dataloader -**************** - -The shape dataloader is designed to create abstract shapes with well known -boundaries, and cast it into a data source for mesh construction. It was primarily -used in testing to debug cellbox generation. As such, there is no data source -for this dataloader. - -.. automodule:: meshiphi.dataloaders.scalar.shape - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/scalar/implemented/visual_iced.rst b/docs/source/sections/Dataloaders/scalar/implemented/visual_iced.rst deleted file mode 100644 index 8328bd12..00000000 --- a/docs/source/sections/Dataloaders/scalar/implemented/visual_iced.rst +++ /dev/null @@ -1,22 +0,0 @@ -************************* -Visual_iced Dataloader -************************* - -Visual_iced is a dataloader for .tiff images, which are outputs from the visual_iced library -developed by Martin Rogers at the British Antarctic Survey's AI Lab. These visual_iced -images are ice/water binary files, generated from a combination of MODIS and SAR -satellite imagery. - -In the source data, 0s are representative of open water, and 1s are representative of -ice. In the dataloader, we map these values to sea ice concentration, in the range of 0 to 100. -Values between 0 and 100 are generated by the aggregation of the 0s and 1s within each cell. - -.. note:: - The visual_iced dataloader only supports loading in single files, as the visual_iced datasets - are not temporally continuous within a given boundary. - -Name in config: :code:`'visual_iced'` - -.. automodule:: meshiphi.dataloaders.scalar.visual_iced - :special-members: __init__ - :members: diff --git a/docs/source/sections/Dataloaders/scalar/index.rst b/docs/source/sections/Dataloaders/scalar/index.rst deleted file mode 100644 index d0f099f4..00000000 --- a/docs/source/sections/Dataloaders/scalar/index.rst +++ /dev/null @@ -1,88 +0,0 @@ -.. _abstract-scalar-dataloader-index: - -****************** -Scalar Dataloaders -****************** - - - -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Abstract Scalar Base Class -^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. toctree:: - :maxdepth: 1 - :glob: - - ./abstractScalar - -The Abstract Base Class of the scalar dataloaders holds most of the -functionality that would be needed to manipulate the data to work -with the mesh. When creating a new dataloader, the user must define -how to open the data files, and what methods are required to manipulate -the data into a standard format. More details are provided on the -:ref:`abstractScalar doc page`. - -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Scalar Dataloader Examples -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Data must be imported and saved as an xarray.Dataset, or a pandas.DataFrame object. -Below is a simple example of how to load in a NetCDF file:: - - from meshiphi.Dataloaders.Scalar.AbstractScalar import ScalarDataLoader - import xarray as xr - import logging - - class MyDataLoader(ScalarDataLoader): - - def import_data(self, bounds): - logging.debug("Importing my data...") - # Open Dataset - if len(self.files) == 1: data = xr.open_dataset(self.files[0]) - else: data = xr.open_mfdataset(self.files) - - # Rename coordinate columns to 'lat', 'long', 'time' if they aren't already - data = data.rename({'lon':'long'}) - - # Limit to initial boundary - data = self.trim_data(bounds, data=data) - - return data - - -Sometimes there are parameters that are constant for a data source, but are not -constant for all data sources. Default values are defined in the dataloader :code:`add_default_params()`. -Below is an example of setting default parameters for reprojection of a dataset:: - - class MyDataLoader(ScalarDataLoader): - def add_default_params(self, params): - # Add all the regular default params that scalar dataloaders have - params = super().add_default_params(params) # This line MUST be included - - # Define projection of dataset being imported - params['in_proj'] = 'EPSG:3412' - # Define projection required by output - params['out_proj'] = 'EPSG:4326' # default is EPSG:4326, so strictly - # speaking this line is not necessary - - # Coordinates in dataset that will be reprojected into long/lat - params['x_col'] = 'x' # Becomes 'long' - params['y_col'] = 'y' # Becomes 'lat' - - return params - - def import_data(self, bounds): - # Open Dataset - data = xr.open_mfdataset(self.files) - - # Can't easily determine bounds of data in wrong projection, so skipping for now - return data - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Implemented Scalar Dataloaders -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. toctree:: - :maxdepth: 1 - :glob: - - ./implemented/* \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/abstractVector.rst b/docs/source/sections/Dataloaders/vector/abstractVector.rst deleted file mode 100644 index 965999f8..00000000 --- a/docs/source/sections/Dataloaders/vector/abstractVector.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. _abstract-vector-dataloader: - -************************** -Abstract Vector Dataloader -************************** - -.. automodule:: meshiphi.dataloaders.vector.abstract_vector - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/BalticCurrent.rst b/docs/source/sections/Dataloaders/vector/implemented/BalticCurrent.rst deleted file mode 100644 index 59f3363d..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/BalticCurrent.rst +++ /dev/null @@ -1,22 +0,0 @@ -************************** -Baltic Currents Dataloader -************************** - -Baltic current values are provided by the Finnish Meteorological Institute (FMI). -From their webpage: - - This CMEMS Baltic Sea Physical Reanalysis product provides a physical reanalysis - for the whole Baltic Sea area, inclusive the Transition Area to the North Sea. - The surface variables are available every hour and include sea surface height, - ice concentration and total ice thickness. The other variables, available as daily - and monthly means, are salinity, temperature, horizontal current components, - mixed layer depth, bottom salinity and bottom temperature. - -Name in config: :code:`'baltic_currents'` - -Data can be downloaded from `here `_ - - -.. automodule:: meshiphi.dataloaders.vector.baltic_current - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/DUACS.rst b/docs/source/sections/Dataloaders/vector/implemented/DUACS.rst deleted file mode 100644 index 96f714e1..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/DUACS.rst +++ /dev/null @@ -1,24 +0,0 @@ -************************* -DUACS Currents Dataloader -************************* - -DUACS is a European operational multi-mission production system of altimeter data that provides (amongst other products) -global ocean current vectors. The system was developed by CNES/CLS and data is available from the copernicus marine data -service. - -From their website: - Altimeter satellite gridded Sea Level Anomalies (SLA) computed with respect to a twenty-year 1993, 2012 mean. The SLA - is estimated by Optimal Interpolation, merging the L3 along-track measurement from the different altimeter missions - available. Part of the processing is fitted to the Global Ocean. The product gives additional variables (i.e. - Absolute Dynamic Topography and geostrophic currents). - -Name in config: :code:`'duacs_currents'` - -Near real-time data can be downloaded from `here `_. - -Reanalysis data can be downloaded from `here. `_ - - -.. automodule:: meshiphi.dataloaders.vector.duacs_current - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/ERA5WaveDirection.rst b/docs/source/sections/Dataloaders/vector/implemented/ERA5WaveDirection.rst deleted file mode 100644 index 2e773bed..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/ERA5WaveDirection.rst +++ /dev/null @@ -1,27 +0,0 @@ -****************************** -ERA5 Wave Direction Dataloader -****************************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_wave_direction'` - -Instructions for how to download their data products are -available `here `_ - -This dataloader takes the mean wave direction variable, which gives the direction the waves are coming from as an angle -from north in degrees, and converts it to a unit vector with u and v components. - - -.. automodule:: meshiphi.dataloaders.vector.era5_wave_direction_vector - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/ERA5Wind.rst b/docs/source/sections/Dataloaders/vector/implemented/ERA5Wind.rst deleted file mode 100644 index e5e5c72e..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/ERA5Wind.rst +++ /dev/null @@ -1,24 +0,0 @@ -******************** -ERA5 Wind Dataloader -******************** - -ERA5 is a family of data products produced by the European Centre for Medium-Range Weather Forecasts (ECMWF). -It is the fifth generation ECMWF atmospheric reanalysis of the global climate covering the period from January 1950 to present. - -From their website: - - ERA5 provides hourly estimates of a large number of atmospheric, - land and oceanic climate variables. The data cover the Earth on a - 30km grid and resolve the atmosphere using 137 levels from the - surface up to a height of 80km. ERA5 includes information about - uncertainties for all variables at reduced spatial and temporal resolutions. - -Name in config: :code:`'era5_wind'` - -Instructions for how to download their data products are -available `here `_ - - -.. automodule:: meshiphi.dataloaders.vector.era5_wind - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/NorthSeaCurrent.rst b/docs/source/sections/Dataloaders/vector/implemented/NorthSeaCurrent.rst deleted file mode 100644 index 73ac01b7..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/NorthSeaCurrent.rst +++ /dev/null @@ -1,17 +0,0 @@ -***************************** -North Sea Currents Dataloader -***************************** - -North Atlantic Ocean currents are provided by the Proudman Oceanographic Laboratory -Coastal-Ocean Modelling System (POLCOMS). Their dataset was generated by the UK National -Oceanography Centre, Liverpool. - -Name in config: :code:`'northsea_currents'` - -More information on where to download the data is -available `here `_ - - -.. automodule:: meshiphi.dataloaders.vector.north_sea_current - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/ORAS5Current.rst b/docs/source/sections/Dataloaders/vector/implemented/ORAS5Current.rst deleted file mode 100644 index df246f58..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/ORAS5Current.rst +++ /dev/null @@ -1,22 +0,0 @@ -************************* -ORAS5 Currents Dataloader -************************* - -Ocean Reanalysis System 5 (ORAS5) is a publicly available dataset providing -estimated values for many different ocean parameters, including ocean currents. - -From their website: - - This dataset provides global ocean and sea-ice reanalysis - (ORAS5: Ocean Reanalysis System 5) monthly mean data prepared by - the European Centre for Medium-Range Weather Forecasts (ECMWF) - OCEAN5 ocean analysis-reanalysis system. This system comprises 5 ensemble - members from which one member is published in this catalogue entry. - -Name in config: :code:`'oras5_currents'` - -Data can be downloaded from `here `_ - -.. automodule:: meshiphi.dataloaders.vector.oras5_current - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/SOSE.rst b/docs/source/sections/Dataloaders/vector/implemented/SOSE.rst deleted file mode 100644 index 5eeab27e..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/SOSE.rst +++ /dev/null @@ -1,22 +0,0 @@ -************************ -SOSE Currents Dataloader -************************ - -Southern Ocean State Estimate (SOSE) is a publicly available dataset that provides (amongst other products) -ocean current vectors of the southern ocean. It is a project led by Mazloff at the Scripps Institution of Oceanography. - -From their website: - The Southern Ocean State Estimate (SOSE) is a model-generated best fit to Southern Ocean - observations. As such, it provides a quantitatively useful climatology of the mean-state - of the Southern Ocean. - -Name in config: :code:`'sose'` - -Data can be downloaded from `here `_ - -Note: This dataloader may not work as is for new data downloaded, it has been internally collated into -a more easily ingestable format. - -.. automodule:: meshiphi.dataloaders.vector.sose - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/VectorCSV.rst b/docs/source/sections/Dataloaders/vector/implemented/VectorCSV.rst deleted file mode 100644 index f4ff33ba..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/VectorCSV.rst +++ /dev/null @@ -1,14 +0,0 @@ -********************* -Vector CSV Dataloader -********************* - -The vector CSV dataloader is designed to take any `.csv` file and cast -it into a data source for mesh construction. It was primarily used in testing -for loading dummy data to test performance. As such, there is no data source -for this dataloader. - -Name in config: :code:`'vector_csv'` - -.. automodule:: meshiphi.dataloaders.vector.vector_csv - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/implemented/VectorGRF.rst b/docs/source/sections/Dataloaders/vector/implemented/VectorGRF.rst deleted file mode 100644 index 65471d69..00000000 --- a/docs/source/sections/Dataloaders/vector/implemented/VectorGRF.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. _vector-grf: - -********************* -Vector GRF Dataloader -********************* - -Produces a gaussian random field of vector values, useful for producing -artificial, yet somewhat realistic values for real-world variables. -Values are broken down into `x` and `y` components, and saved in two -columns in the final dataframe. - -Name in config: :code:`'vector_grf'` - -Can be used to generate :ref:`binary masks`. - -For scalar fields, see the :ref:`Vector GRF page`. - -.. code-block:: - :caption: Default parameters for vector GRF dataloader - - { - "loader": "vector_grf", - "params":{ - "vec_x": "uC", # - Name of the first data column - "vec_y": "vC", # - Name of the second data column - "seed": None, # - Seed for random number generator. Must - # be int or None. None sets a random seed - "size": 512, # - Number of datapoints per lat/long axis - "alpha": 3, # - Power of the power-law momentum - # distribution used to generate GRF - "min": 0, # - Minimum value of vector magnitude - "max": 10 # - Maximum value of vector magnitude - } - } - -The dataloader is implemented as follows: - -.. automodule:: meshiphi.dataloaders.vector.vector_grf - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Dataloaders/vector/index.rst b/docs/source/sections/Dataloaders/vector/index.rst deleted file mode 100644 index a8aa2139..00000000 --- a/docs/source/sections/Dataloaders/vector/index.rst +++ /dev/null @@ -1,93 +0,0 @@ -.. _abstract-vector-dataloader-index: - -****************** -Vector Dataloaders -****************** - - -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Abstract Vector Base Class -^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. toctree:: - :maxdepth: 1 - :glob: - - ./abstractVector - -The Abstract Base Class of the vector dataloaders holds most of the -functionality that would be needed to manipulate the data to work -with the mesh. When creating a new dataloader, the user must define -how to open the data files, and what methods are required to manipulate -the data into a standard format. More details are provided on the -:ref:`abstractVector doc page` - - -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Vector Dataloader Examples -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Creating a vector dataloader is almost identical to creating a -:ref:`scalar dataloader`. The key differences -are that the `VectorDataLoader` abstract base class must be used, and that -the `data_name` is a comma separated string of the vector component names. -e.g. a dataloader storing a vector with column names :code:`uC` and -:code:`vC` will have an attribute :code:`self.data_name = 'uC,vC'` -Data must be imported and saved as an xarray.Dataset, or a -pandas.DataFrame object. Below is a simple example of how to load in a -NetCDF file:: - - from meshiphi.Dataloaders.Scalar.AbstractScalar import VectorDataLoader - import xarray as xr - import logging - - class MyDataLoader(VectorDataLoader): - def import_data(self, bounds): - logging.debug("Importing my data...") - # Open Dataset - logging.debug(f"- Opening file {self.file}") - data = xr.open_dataset(self.file) - - # Rename coordinate columns to 'lat', 'long', 'time' if they aren't already - data = data.rename({'lon':'long'}) - - # Limit to initial boundary - data = self.trim_data(bounds, data=data) - - return data - - -Similar to scalar data loaders, sometimes there are parameters that are constant -for a data source, but are not constant for all data sources. Default values may -be defined either in the dataloader factory, or within the dataloader itself. -Below is an example of setting default parameters for reprojection of a dataset:: - - class MyDataLoader(ScalarDataLoader): - def add_default_params(self, params): - # Add all the regular default params that scalar dataloaders have - params = super().add_default_params(params) # This line MUST be included - - # Define projection of dataset being imported - params['in_proj'] = 'EPSG:3412' - # Define projection required by output - params['out_proj'] = 'EPSG:4326' # default is EPSG:4326, so strictly - # speaking this line is not necessary - - # Coordinates in dataset that will be reprojected into long/lat - params['x_col'] = 'x' # Becomes 'long' - params['y_col'] = 'y' # Becomes 'lat' - - def import_data(self, bounds): - # Open Dataset - data = xr.open_mfdataset(self.file) - - # Can't easily determine bounds of data in wrong projection, so skipping for now - return data - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Implemented Vector Dataloaders -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. toctree:: - :maxdepth: 1 - :glob: - - ./implemented/* \ No newline at end of file diff --git a/docs/source/sections/Examples.rst b/docs/source/sections/Examples.rst deleted file mode 100644 index ffb36036..00000000 --- a/docs/source/sections/Examples.rst +++ /dev/null @@ -1,71 +0,0 @@ -##################################### -Examples -##################################### - -Digital environment files (meshes) can be created using the MeshiPhi package, either through the -command line interface (CLI) or through the python terminal. This section will provide examples of how to create a digital -environment file using Python. - - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Creating the Digital Environment. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A configuration file is needed to initialise the **Mesh** object which forms the digital environment. This -configuration file is of the same format used in the :ref:`create_mesh` CLI entry-point, and may either be loaded from a -*json* file or constructed within a python interpreter. - -Loading configuration information from a *json* file: -:: - - import json - with open('examples/environment_config/grf_example.config.json', 'r') as f: - config = json.load(f) - - -The digital environment **Mesh** object can then be initialised. This mesh object will be constructed using parameters in it -configuration file. This mesh object can be manipulated further, such as increasing its resolution through further -splitting, adding additional data sources or altering is configuration parameters using functions listed in -the :ref:`Methods - Mesh Construction` section of the documentation. The digital environment **Mesh** object can then be cast to -a json object and saved to a file. -:: - - from meshiphi.mesh_generation.mesh_builder import MeshBuilder - - cg = MeshBuilder(config).build_environmental_mesh() - - mesh = cg.to_json() - -The **Mesh** object can be visualised using the `GeoPlot `_ package, also developed -by BAS. This package is not included in the distribution of MeshiPhi, but can be installed using the following command: - -:: - - pip install bas_geoplot - -**GeoPlot** can be used to visualise the **Mesh** object using the following code in an iPython notebook or -any python interpreter: - -:: - - from bas_geoplot.interactive import Map - - mesh = pd.DataFrame(mesh_json['cellboxes']) - mp = Map(title="GRF Example") - - mp.Maps(mesh, 'MeshGrid', predefined='cx') - mp.Maps(mesh, 'SIC', predefined='SIC') - mp.Maps(mesh, 'Elevation', predefined='Elev', show=False) - mp.Vectors(mesh,'Currents', show=False, predefined='Currents') - mp.Vectors(mesh, 'Winds', predefined='Winds', show=False) - - mp.show() - -The prior should produce a plot which shows the digital environment, including sea ice concentration, elevation, currents and wind. - -.. _splitting_figure: -.. figure:: ./Figures/grf_example_mesh.png - :align: center - :width: 700 - - *Plot showing the expected output of running bas_geoplot on the GRF example mesh provided* \ No newline at end of file diff --git a/docs/source/sections/Installation.rst b/docs/source/sections/Installation.rst deleted file mode 100644 index 9d7773cc..00000000 --- a/docs/source/sections/Installation.rst +++ /dev/null @@ -1,82 +0,0 @@ -************ -Installation -************ - -In this section we outline the necessary steps for installing the MeshiPhi software package. MeshiPhi requires a -pre-existing installation of Python 3.8 or higher. - - -Installing MeshiPhi -##################### - -MeshiPhi can be installed from one of the following two sources: - -from PyPI: -:: - - pip install MeshiPhi - -from Github: -:: - - git clone https://github.com/bas-amop/MeshiPhi.git - cd MeshiPhi - pip install . - - -Installing GDAL (Optional) -########################## - -MeshiPhi has GDAL as an optional requirement. It is only used when exporting TIFF images, so if this is not useful to -you, we would recommend steering clear. It is not trivial and is a common source of problems. -With that said, below are instructions for various operating systems. - -Windows -******* - -.. note:: - We assume a version of Windows 10 or higher, with a working version of Python 3.9 including pip installed. - We recommend installing MeshiPhi into a virtual environment. - -Windows: - -:: - - pip install pipwin # pipwin is a package that allows for easy installation of windows binaries - pipwin install gdal - pipwin install fiona - - -Linux/MacOS -*********** - -Ubuntu/Debian: - -:: - - sudo add-apt-repository ppa:ubuntugis/ppa - sudo apt-get update - sudo apt-get install gdal-bin libgdal-dev - export CPLUS_INCLUDE_PATH=/usr/include/gdal - export C_INCLUDE_PATH=/usr/include/gdal - pip install GDAL==$(gdal-config --version) - - -Fedora: - -:: - - sudo dnf update - sudo dnf install gdal gdal-devel - export CPLUS_INCLUDE_PATH=/usr/include/gdal - export C_INCLUDE_PATH=/usr/include/gdal - pip install GDAL==$(gdal-config --version) - - -MacOS (with HomeBrew): - -:: - - brew install gdal --HEAD - brew install gdal - pip install GDAL==$(gdal-config --version) \ No newline at end of file diff --git a/docs/source/sections/Mesh_Construction/Mesh_construction_classes.rst b/docs/source/sections/Mesh_Construction/Mesh_construction_classes.rst deleted file mode 100644 index c4bc935a..00000000 --- a/docs/source/sections/Mesh_Construction/Mesh_construction_classes.rst +++ /dev/null @@ -1,84 +0,0 @@ -############################# -Mesh Construction - Classes -############################# - -This section describes the main classes of the Mesh Construction module in detail. -For an overview of the abstractions behind the Mesh Construction module, see the -:ref:`Mesh Construction - Overview` section of the documentation. - -MeshBuilder -************ -The MeshBuilder object is the main class of the Mesh Construction module. It is used to build the -EnvironmentalMesh object from a collection geospatial data. Features of the created EnvironmentalMesh -as be set using a configuration file passed to the MeshBuilder object. For more information on the format -of the configuration file, see the :ref:`configuration - mesh construction` section of the documentation. - -.. automodule:: meshiphi.mesh_generation.mesh_builder - -.. autoclass:: meshiphi.mesh_generation.mesh_builder.MeshBuilder - :special-members: __init__ - :members: build_environmental_mesh , split_and_replace, split_to_depth, add_dataloader - -EnvironmentMesh -***************** -The EnvironmentMesh object is a collection of geospatial boundaries containing an aggregated representation -of the data contained within the boundaries (AggregatedCellBox objects). The EnvironmentMesh object is -created by the MeshBuilder object, though the object is mutable and can be updated after construction. - -.. automodule:: meshiphi.mesh_generation.environment_mesh - -.. autoclass:: meshiphi.mesh_generation.environment_mesh.EnvironmentMesh - :special-members: __init__ - :members: load_from_json, update_cellbox , to_json, to_geojson, to_tif, save, merge_mesh, split_and_replace - -NeighbourGraph -*************** -The NeighbourGraph object is used to store the connectivity information between the cells of the EnvironmentMesh. -The NeighbourGraph object is created by the MeshBuilder object and is encoded into the EnvironmentalMesh. - -.. automodule:: meshiphi.mesh_generation.neighbour_graph - -.. autoclass:: meshiphi.mesh_generation.neighbour_graph.NeighbourGraph - :members: initialise_neighbour_graph, get_neighbour_case, update_neighbours - -CellBox -*************** -The CellBox object is used to store the data contained within a geospatial boundary in the MeshBuilder. -The CellBox object is created by the MeshBuilder object and transformed into an AggregatedCellBox object -when the MeshBuilder returns the EnvironmentalMesh object. - -.. automodule:: meshiphi.mesh_generation.cellbox - -.. autoclass:: meshiphi.mesh_generation.cellbox.CellBox - :special-members: __init__ - :members: set_data_source, should_split, split, set_parent, aggregate - -MetaData -*********** -The Metadata object is used to store the metadata associated with a CellBox object within the MeshBuilder. This includes -associated DataLoaders, the depth of the CellBox within the MeshBuilder, and the parent CellBox of the CellBox among others. - -.. automodule:: meshiphi.mesh_generation.metadata - -.. autoclass:: meshiphi.mesh_generation.metadata.Metadata - :special-members: __init__ - -AggregatedCellBox -****************** -An aggregated representation of the data contained within a geospatial boundary. The AggregatedCellBox object is created -by the CellBox object when the MeshBuilder returns the EnvironmentalMesh. - -.. automodule:: meshiphi.mesh_generation.aggregated_cellbox - -.. autoclass:: meshiphi.mesh_generation.aggregated_cellbox.AggregatedCellBox - :special-members: __init__ - :members: contains_point, to_json - - - - - - - - - diff --git a/docs/source/sections/Mesh_Construction/Mesh_validation.rst b/docs/source/sections/Mesh_Construction/Mesh_validation.rst deleted file mode 100644 index 5155c54c..00000000 --- a/docs/source/sections/Mesh_Construction/Mesh_validation.rst +++ /dev/null @@ -1,19 +0,0 @@ -######################## -Mesh Validation -######################## - -TODO - add description - -==================== -Mesh Validator -==================== -.. automodule:: meshiphi.mesh_validation.mesh_validator - :special-members: __init__ - :members: - -==================== -Sampler -==================== -.. automodule:: meshiphi.mesh_validation.sampler - :special-members: __init__ - :members: \ No newline at end of file diff --git a/docs/source/sections/Outputs.rst b/docs/source/sections/Outputs.rst deleted file mode 100644 index 69461649..00000000 --- a/docs/source/sections/Outputs.rst +++ /dev/null @@ -1,135 +0,0 @@ -.. _outputs: - -******************** -Outputs - Data Types -******************** - -###################### -The Mesh.json file -###################### - -Once a mesh has been constructed using MeshiPhi, it can then be exported as a json object and saved to a file. An example -of mesh construction and json object generation are as follows: - -:: - - from meshiphi.mesh import Mesh - - with open('config.json', 'r') as f: - config = json.load(f) - - mesh = Mesh(config) - mesh_json = mesh.to_json() - -.. note:: - Examples and a description of the configuration files can be found in - the :ref:`configuration - mesh construction` section of this document. - - -The json object outputted by the Mesh consists of 3 sections: **config**, -**cellboxes** and **neighbour_graph**. - -:: - - { - "config": { - ... - }, - "cellboxes": [ - {...}, - ... - {...} - ], - "neighbour_graph": [ - "": { - ... - }, - ... - "id_n": { - ... - } - ] - } - -where the parts of the json object can be understood as follows: - -* **config** : The configuration file used to generate the Mesh. -* **cellboxes** : A list of json representations of CellBox objects that form the Mesh. -* **neighbour_graph** : A graphical representation of the adjacency of CellBoxes within the Mesh. - -========= -cellboxes -========= - -Each CellBox object within **cellboxes** in the outputted json object is of -the following form: - -:: - - { - "id" (string): ..., - "geometry" (string): ..., - "cx" (float): ..., - "cy" (float): ..., - "dcx" (float): ..., - "dcy" (float): ..., - "" (float): ..., - ... - "" (float): ... - } - -Where the values within the CellBox represent the following: - -* **id** : The index of the CellBox within the Mesh. -* **geometry** : The spatial boundaries of the CellBox. -* **cx** : The x-position of the centroid of the CellBox, given in degrees latitude. -* **cy** : The y-position of the centroid of the CellBox, given in degrees longitude. -* **dcx** : The x-distance from the edge of the CellBox to the centroid of the CellBox. Given in degrees longitude. -* **dxy** : the y-distance from the edge of the CellBox to the centroid of the CellBox. Given in degrees latitude. - -.. figure:: ./Figures/cellbox_json.png - :align: center - :width: 700 - - -=============== -neighbour_graph -=============== - -For each CellBox in the **cellboxes** section of the json object, there will be a -corresponding entry in the **neighbour_graph**. - -.. note:: - Once the vehicle accessibility conditions have been applied to the json object, this may no longer be true - as inaccessible CellBoxes will be removed from *neighbour_graph* but will remain in *cellboxes* - -Each entry in the **neighbour_graph** is of the following form: - -:: - - "": { - "1": [...], - "2": [...], - "3": [...], - "4": [...], - "-1": [...], - "-2": [...], - "-3": [...], - "-4": [...] - } - -where each of the values represent the following: - -* **** : The id of a CellBox within *cellboxes* - * **1** : A list of id's of CellBoxes within *cellboxes* to the North-East of the CellBox specified by 'id'. - * **2** : A list of id's of CellBoxes within *cellboxes* to the East of the CellBox specified by 'id'. - * **3** : A list of id's of CellBoxes within *cellboxes* to the South-East of the CellBox specified by 'id'. - * **4** : A list of id's of CellBoxes within *cellboxes* to the South-West of the CellBox specified by 'id'. - * **-1** : A list of id's of CellBoxes within *cellboxes* to the South of the CellBox specified by 'id'. - * **-2** : A list of id's of CellBoxes within *cellboxes* to the South-West of the CellBox specified by 'id'. - * **-3** : A list of id's of CellBoxes within *cellboxes* to the North-West of the CellBox specified by 'id'. - * **-4** : A list of id's of CellBoxes within *cellboxes* to the South of the CellBox specified by 'id'. - -.. figure:: ./Figures/neighbour_graph_json.png - :align: center - :width: 700 diff --git a/docs/source/sections/Plotting/mesh_plotting.rst b/docs/source/sections/Plotting/mesh_plotting.rst deleted file mode 100644 index ed9ca5e1..00000000 --- a/docs/source/sections/Plotting/mesh_plotting.rst +++ /dev/null @@ -1,11 +0,0 @@ -############################ -Mesh Plotting -############################ - -As well as the interactive plotting functionality provided by `GeoPlot `_, -Meshiphi also contains a built-in plotting class :code:`mesh_plotter` that allows the user to produce static plots of a -given environmental mesh. - -.. automodule:: meshiphi.mesh_plotting.mesh_plotter - :special-members: __init__ - :members: \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..e82d010f --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,83 @@ +site_name: MeshiPhi +repo_url: https://github.com/bas-amop/MeshiPhi/ +edit_uri: edit/main/docs/ +theme: + name: material + logo: https://cdn.web.bas.ac.uk/bas-style-kit/0.7.3/img/logos-symbols/bas-roundel-inverse-transparent-64.png + favicon: https://cdn.web.bas.ac.uk/bas-style-kit/0.7.3/img/favicon/favicon-16x16.png + icon: + repo: fontawesome/brands/github + features: + # Enables copying of code blocks + - content.code.copy + # Enables "edit this page" to contribute to docs + - content.action.edit + palette: + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: default + toggle: + icon: material/brightness-7 + name: Switch to dark mode + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to light mode + +plugins: + - search + - mkdocstrings: + handlers: + python: + paths: + - meshiphi + options: + docstring_style: google + # show_object_full_path: true + merge_init_into_class: true + show_docstring_functions: true + show_docstring_classes: true + show_source: false + - mkdocs-autoapi: + autoapi_dir: meshiphi + autoapi_add_nav_entry: true + autoapi_file_patterns: + - "*.py" + - include-markdown: + rewrite_relative_urls: true + +nav: + - Home: index.md + - overview.md + - installation.md + - examples.md + - cli.md + - Configuration: + - Overview: config/overview.md + - Mesh Construction: config/mesh_construction.md + - output.md + - Dataloaders: + - Overview: dataloaders/overview.md + - dataloaders/interface.md + - dataloaders/factory.md + - dataloaders/scalar.md + - dataloaders/vector.md + - dataloaders/lut.md + - dataloaders/adding.md + - Mesh Construction: + - Overview: mesh_construction/index.md + - Classes: mesh_construction/classes.md + - plotting.md + - development.md + - contributing.md + +markdown_extensions: + - toc: + permalink: True + - admonition + - pymdownx.details + - pymdownx.superfences + +copyright: Copyright © 2026 British Antarctic Survey \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7cf3161c..91f0f691 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,11 +48,13 @@ dependencies = [ "rasterio" ] -[project.optional-dependencies] +[dependency-groups] docs = [ - "sphinx", - "sphinx_rtd_theme", - "sphinx_markdown_builder" + "mkdocs", + "mkdocs-autoapi", + "mkdocs-include-markdown-plugin", + "mkdocstrings[python]", + "mkdocs-material", ] test = [ "pytest",