diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 79a2964..c77213f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,6 +29,8 @@ repos: - id: codespell name: codespell description: Checks for common misspellings in text files + additional_dependencies: + - tomli - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.2.2 hooks: diff --git a/README.md b/README.md index c0093be..a50acb7 100644 --- a/README.md +++ b/README.md @@ -18,23 +18,24 @@ pip install GOSTurban 1. Clone or download this repository to your local machine. Then, navigate to the root directory of the repository: - ```shell - git clone https://github.com/worldbank/GOSTurban.git - cd GOSTurban - ``` + ```shell + git clone https://github.com/worldbank/GOSTurban.git + cd GOSTurban + ``` 2. Create a virtual environment (optional but recommended): - ```shell - python3 -m venv venv - source venv/bin/activate # On Windows, use `venv\Scripts\activate` - ``` + ```shell + python3 -m venv venv + source venv/bin/activate # On Windows, use `venv\Scripts\activate` + ``` 3. Install the package with dependencies: - ```shell - pip install . - ``` + ```shell + pip install . + ``` + ### Developer Installation Install the package **in editable** mode with all of the dependencies needed to run the tests and build the documentation locally: diff --git a/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb b/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb index 3f107a7..67f0b29 100755 --- a/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb +++ b/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb @@ -17,7 +17,6 @@ "outputs": [], "source": [ "import os\n", - "import sys\n", "\n", "import rasterio\n", "import rasterio.features\n", @@ -29,8 +28,7 @@ "from rasterio.plot import show\n", "\n", "# Import GOST urban functions\n", - "sys.path.append(\"../../\")\n", - "from src.LEI import *" + "from GOSTurban.LEI import calculate_LEI, summarize_LEI" ] }, { @@ -58,7 +56,7 @@ " os.stat(os.path.join(root, \"GHSL.tif\")).st_size,\n", " )\n", " )\n", - " except:\n", + " except Exception:\n", " pass" ] }, @@ -67,7 +65,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Vizualize raster data - GHSL" + "# Visualize raster data - GHSL" ] }, { @@ -104,7 +102,7 @@ "source": [ "# write out raster to file\n", "outProperties = inRaster.profile\n", - "outRaster = outRaster.astype(\"int32\")\n", + "outRaster = inRaster.astype(\"int32\")\n", "outProperties[\"dtype\"] = \"int32\"\n", "with rasterio.open(inputGHSL.replace(\".tif\", \"_LEI.tif\"), \"w\", **outProperties) as out:\n", " out.write(outRaster)" diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb index 8a70db9..43cc23d 100644 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb @@ -250,7 +250,7 @@ "for idx, row in final_center.iterrows():\n", " try:\n", " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"])]\n", - " except:\n", + " except Exception:\n", " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"].buffer(0))]\n", " if sel_city.shape[0] > 0:\n", " final_center.loc[idx, \"wCity\"] = sel_city[\"city\"].iloc[0]" @@ -278,7 +278,7 @@ "for idx, row in hd_center.iterrows():\n", " try:\n", " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"])]\n", - " except:\n", + " except Exception:\n", " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"].buffer(0))]\n", " if sel_city.shape[0] > 0:\n", " hd_center.loc[idx, \"wCity\"] = sel_city[\"city\"].iloc[0]\n", diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb index c00cc56..b42b872 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "# Combine All Urban Metrics\n", - "Run the 4 seperate notebooks seperately, and then this notebook last to combine all of the results into one shapefile\n", + "Run the 4 separate notebooks separately, and then this notebook last to combine all of the results into one shapefile\n", "\n", "Check to see that all of the results have the same number of rows" ] diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb index 8ce2dc5..72bd255 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb @@ -138,7 +138,7 @@ "\n", " input_shapes_gpd = gpd.read_file(shpName)\n", "\n", - " # psuedocode\n", + " # pseudocode\n", " # For each Shape:\n", " # Select all built-up pixels that are mostly within shape\n", " # Area of shape = sum of all pixels * area of each pixel\n", @@ -197,7 +197,7 @@ " # print(\"print metrics_scalar\")\n", " # print(metrics_scalar)\n", "\n", - " # and concatinate it with the row's shape\n", + " # and concatenate it with the row's shape\n", " new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", "\n", " # print(\"print new_temp_gdf\")\n", diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb index a5b7fb9..be07601 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb @@ -379,7 +379,7 @@ " metrics_scalar[k] = [metrics[k]]\n", " metrics_df = pd.DataFrame(metrics_scalar)\n", "\n", - " # and concatinate it with the row's shape\n", + " # and concatenate it with the row's shape\n", " new_temp_gdf_proj = pd.concat(\n", " [temp_gdf_proj.reset_index(drop=True), metrics_df], axis=1\n", " )\n", diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb index 8a25579..e0a7696 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb @@ -134,7 +134,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Psuedocode\n", + "# Pseudocode\n", "\n", "# pop_values = []\n", "# For each Shape/FUA:\n", @@ -193,7 +193,7 @@ " # geometry\n", " d[\"geometry\"] = d.apply(lambda row: Point(row[\"x\"], row[\"y\"]), axis=1)\n", "\n", - " # exlude pixels with value less than 77\n", + " # exclude pixels with value less than 77\n", " print(len(d))\n", "\n", " # print(d)\n", @@ -238,7 +238,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Psuedocode\n", + "# Pseudocode\n", "\n", "# for each Shape/FUA:\n", "# pixel_count_below_median = 0\n", @@ -288,7 +288,7 @@ "\n", " d = gpd.GeoDataFrame({\"col\": col, \"row\": row, \"val\": val})\n", "\n", - " # exlude pixels with value less than 77\n", + " # exclude pixels with value less than 77\n", " d = d[d.val > 77]\n", " d_count = len(d)\n", " # print(f\"d_count is {d_count}\")\n", @@ -322,7 +322,7 @@ " # print(\"print metrics_scalar\")\n", " # print(metrics_scalar)\n", "\n", - " # and concatinate it with the row's shape\n", + " # and concatenate it with the row's shape\n", " new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", "\n", " # print(\"print new_temp_gdf\")\n", diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb index 9fde5b1..d2c1ded 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb @@ -158,7 +158,7 @@ " # print(\"print metrics_scalar\")\n", " # print(metrics_scalar)\n", "\n", - " # and concatinate it with the row's shape\n", + " # and concatenate it with the row's shape\n", " new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", "\n", " # print(\"print new_temp_gdf\")\n", @@ -185,7 +185,7 @@ " # metrics_scalar['intersection_density_km'] = 0\n", " # metrics_scalar['street_density_km'] = 0\n", " # metrics_df = pd.DataFrame(metrics_scalar)\n", - " # and concatinate it with the row's shape\n", + " # and concatenate it with the row's shape\n", " # new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", " # output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True)\n", " continue" diff --git a/notebooks/Implementations/MENA_Benchmarking/NTL_zonal_stats.ipynb b/notebooks/Implementations/MENA_Benchmarking/NTL_zonal_stats.ipynb index f5ab4c6..fe97af5 100644 --- a/notebooks/Implementations/MENA_Benchmarking/NTL_zonal_stats.ipynb +++ b/notebooks/Implementations/MENA_Benchmarking/NTL_zonal_stats.ipynb @@ -99,22 +99,8 @@ } ], "metadata": { - "kernelspec": { - "display_name": "urban_test", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" + "name": "python" } }, "nbformat": 4, diff --git a/notebooks/Implementations/README.md b/notebooks/Implementations/README.md index 2942e71..3931045 100644 --- a/notebooks/Implementations/README.md +++ b/notebooks/Implementations/README.md @@ -1,2 +1,3 @@ # Implementations + The primary role of the GOST team in the World Bank Group is to support operational teams in their exploitation of geospatial data. This happens in many different ways, and the notebooks herein present examples of the specific support the team has provided to various investigations of urbanization. diff --git a/notebooks/Implementations/Slum_Mapping/slumML/README.md b/notebooks/Implementations/Slum_Mapping/slumML/README.md index cdd940b..68f58e9 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/README.md +++ b/notebooks/Implementations/Slum_Mapping/slumML/README.md @@ -3,6 +3,7 @@ ## 1. slumML main component ### 1.1. Sample data preparation + (1) 5–10 sample areas per area character of interest (e.g., slum, commercial, middle-class resident, etc). If you’d like to cluster your target city into four classes (slum, Commercial, Middle-income neighborhoods, and rich neighborhoods), we need 20–40 sample area data in total. If you have a colleague who can map these sample areas in GIS, please provide a shapefile of the sample areas to me. If not, just draw lines on a paper map, scan it as a high-resolution PDF, and send it to me. I will digitize the sample areas. (2) In relation to (1), we need to have local staff who can verify an ML-predicted result to adjust the model. @@ -11,17 +12,21 @@ The below image show examples of the sample areas gathered for the Niamey study: ![Sample areas](https://user-images.githubusercontent.com/64405484/149363357-ac2fe7d9-4aca-4345-88a1-c5d2f9e304a5.png) ### 1.2. Actual slumML analysis - STEP1 and STEP2 + The original codes were developed by Alex Chunet. Perform STEP 1 to generate morphological indices of the target building footprint layer. Then run STEP 2 to perform an automated supervised ML analysis. ## 2. Auxiliary materials + ### 2.1. GOB2FC (Google Open Building layer to ESRI feature class) + This script is to convert the CSV file(s) of [Google Open Buildings](https://sites.research.google/open-buildings/) to a Feature Class. ![Layout2](https://user-images.githubusercontent.com/64405484/137813865-0abd8f0c-ff15-4980-9251-042a8f9dc66b.png) An example map rendered in ArcGIS Pro (Cairo Metropolitan Area) #### 2.2.1. How to use + This script uses ArcPy modules, so your environment should have ArcPy library. Simply, load the script to your Tool Box on your ArcGIS Pro project and run it. The target Open Buildings CSV file must be stored in the folder where the script is located. If you want to test the script with a small chunk of a full CSV, modify the code: @@ -32,6 +37,7 @@ df_test.reset_index(inplace=True, drop=True) for i, r in df_conf.iterrows(): ``` + For example, iloc[0:100] will retirieve the first 100 records from the original data. df_conf should be replaced by df_test. #### 2.2.2. NOTE diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb index b5e6081..8bed066 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb @@ -106,7 +106,9 @@ "# Prepare the original shape file\n", "original = gpd.read_file(f) # Read ESEI shapefile\n", "if original.crs != WGS:\n", - " original = original.to_crs(WGS) # Convert the spatial referenct to WGS if it is not\n", + " original = original.to_crs(\n", + " WGS\n", + " ) # Convert the spatial referenced to WGS if it is not\n", "\n", "original[\"PID\"] = original.index + 1\n", "\n", @@ -115,7 +117,7 @@ "fil = original.copy()\n", "\n", "fil = fil.to_crs(UTM) # Convert the spatial reference to UTM\n", - "# Adding attributes to the shapefile: area, geomerty, and PID (unique IDs)\n", + "# Adding attributes to the shapefile: area, geometry, and PID (unique IDs)\n", "fil[\"area\"] = fil.area\n", "fil[\"centroid\"] = fil[\"geometry\"].centroid\n", "\n", @@ -151,9 +153,9 @@ "def Main(passed_dict):\n", " # unpack passed dict into local variables for this thread.\n", " short = passed_dict[\"df\"]\n", - " thread_no = passed_dict[\"thread_no\"]\n", + " # thread_no = passed_dict[\"thread_no\"]\n", " print_thresh = passed_dict[\"print_thresh\"]\n", - " save_thresh = passed_dict[\"save_thresh\"]\n", + " # save_thresh = passed_dict[\"save_thresh\"]\n", "\n", " # set up some counters / timings\n", " t = time.time()\n", @@ -262,8 +264,8 @@ " print(\"%s rows completed at %s\" % (counter, time.ctime()))\n", "\n", " \"\"\"\n", - " # this functionality saves progress in case the process cannot be finished in one sitting. \n", - " # ideally, finish the processing in one sitting. \n", + " # this functionality saves progress in case the process cannot be finished in one sitting.\n", + " # ideally, finish the processing in one sitting.\n", " old = 0\n", " if counter % save_thresh == 0:\n", " saver = pd.DataFrame(bundle)\n", diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py index d93f560..dd8c81b 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py @@ -47,11 +47,33 @@ ### def Main(passed_dict): + """ + This function will calculate the nearest neighbours and their attributes for each building in the passed DataFrame. + + Parameters + ---------- + passed_dict : dict + a dictionary containing the following elements: + df : DataFrame + a DataFrame of building footprints, with a unique identifier, a centroid, and an area + thread_no : int + an integer representing the thread number + print_thresh : int + an integer representing the number of rows to process before printing progress + save_thresh : int + an integer representing the number of rows to process before saving progress + + Returns + ------- + list + a list of dictionaries, each containing the attributes of the nearest neighbours for each building in the passed DataFrame + + """ # unpack passed dict into local variables for this thread. short = passed_dict["df"] - thread_no = passed_dict["thread_no"] + # thread_no = passed_dict["thread_no"] print_thresh = passed_dict["print_thresh"] - save_thresh = passed_dict["save_thresh"] + # save_thresh = passed_dict["save_thresh"] # set up some counters / timings t = time.time() diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb index 359fbb4..24b7226 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb @@ -98,7 +98,7 @@ "metadata": {}, "outputs": [], "source": [ - "pth = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/\" # Directory to save model, ouputs\n", + "pth = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/\" # Directory to save model, outputs\n", "building_file = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_DA_morphology.shp\" # Specify the processed building footprint data\n", "sample_file = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_sample_data.shp\" # Specify the sample data" ] diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py index 96ea775..4285aaa 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py @@ -124,6 +124,20 @@ def MLpred(df): + """ + Perform machine learning prediction on the input dataframe. + + Parameters + ---------- + df : pandas.DataFrame + The input dataframe containing the data for prediction. + + Returns + ------- + pandas.DataFrame + The dataframe with the predicted values merged. + + """ df_input = df[predictors] # Extract predictor cols only (specified by the 'predictors' LIST) hf_temp = H2OFrame(df_input) @@ -135,9 +149,9 @@ def MLpred(df): df.reset_index(inplace=True) pred_df_temp["PID"] = df.PID - ans = pd.merge(df, pred_df_temp, on="PID") + ans_var = pd.merge(df, pred_df_temp, on="PID") - return ans + return ans_var # Create an empty DF for append diff --git a/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb b/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb index 50e73e6..8a9b779 100644 --- a/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb +++ b/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "source": [ "# Exploring DEGURBA\n", - "The Degree of Urbanization methodology developed by the European Commission provides a consistent definition of urban through application of population density and total population theresholds to gridded population data." + "The Degree of Urbanization methodology developed by the European Commission provides a consistent definition of urban through application of population density and total population thresholds to gridded population data." ] }, { @@ -253,7 +253,7 @@ " xx = shape(cShape)\n", " xx = Polygon(xx.exterior)\n", " cShape = xx.__geo_interface__\n", - " # If the shape is urban, claculate total pop\n", + " # If the shape is urban, calculate total pop\n", " mask = rasterize(\n", " [(cShape, 0)],\n", " out_shape=data[0, :, :].shape,\n", diff --git a/notebooks/Implementations/URB_LKA_CCDR_Support/LKA_Urban_Comp.ipynb b/notebooks/Implementations/URB_LKA_CCDR_Support/LKA_Urban_Comp.ipynb index 9c29dab..9a35e4b 100644 --- a/notebooks/Implementations/URB_LKA_CCDR_Support/LKA_Urban_Comp.ipynb +++ b/notebooks/Implementations/URB_LKA_CCDR_Support/LKA_Urban_Comp.ipynb @@ -30,7 +30,8 @@ "import pandas as pd\n", "import geopandas as gpd\n", "\n", - "import GOSTurban.UrbanRaster as urban" + "import GOSTurban.UrbanRaster as urban\n", + "import GOSTurban" ] }, { @@ -132,22 +133,8 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" + "name": "python" } }, "nbformat": 4, diff --git a/notebooks/Implementations/URB_LKA_CCDR_Support/README.md b/notebooks/Implementations/URB_LKA_CCDR_Support/README.md index 3d58bb8..fa5c7bf 100644 --- a/notebooks/Implementations/URB_LKA_CCDR_Support/README.md +++ b/notebooks/Implementations/URB_LKA_CCDR_Support/README.md @@ -1,4 +1,5 @@ # Urbanization for CCDR + The Sri Lanka CCDR team is looking for support in urban deep dives on 9 provincial capitals. The following analysis will be completed: | Analysis | Phase | Notebook | Plan | diff --git a/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb b/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb index 7dcee24..1bb76cc 100644 --- a/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb +++ b/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb @@ -101,8 +101,8 @@ " final_folder=\"FINAL_STANDARD_1KM\",\n", " ghspop_suffix=\"1k\",\n", " )\n", - " adm2_res = os.path.join(xx.final_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\")\n", - " ea_res = os.path.join(xx.final_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", + " # adm2_res = os.path.join(xx.final_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\")\n", + " # ea_res = os.path.join(xx.final_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", " tPrint(\"***** Extracting Global Layers %s\" % iso3)\n", " xx.extract_layers(\n", " global_landcover,\n", diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb index 606b965..bce2a76 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb @@ -77,7 +77,7 @@ " cur_path = os.path.join(\"s3://\", bucket, res[\"Key\"])\n", " try:\n", " urban_tiff[cur_lyr].append(cur_path)\n", - " except:\n", + " except Exception:\n", " urban_tiff[cur_lyr] = [cur_path]" ] }, @@ -170,7 +170,7 @@ " cur_label = f\"{cur_pop}_{cur_type}\"\n", " try:\n", " db_tiffs[cur_label].append(cur_path)\n", - " except:\n", + " except Exception:\n", " db_tiffs[cur_label] = [cur_path]" ] }, diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb index 0157813..5edfcee 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb @@ -174,7 +174,7 @@ " try:\n", " comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop])\n", " res = urb.jaccard_index(pop_type=[dou_pop, db_pop])\n", - " except:\n", + " except Exception:\n", " res = {\"urb_jaccard\": -1, \"hd_jaccard\": -1}\n", " else:\n", " comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop])\n", diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb index dcd624a..fa4bb44 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/NGA_specific_results.ipynb @@ -37,7 +37,8 @@ "\n", "# Import GOST urban functions\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.urban_helper as helper\n", + "import GOSTurban\n", + "import GOSTurban.urban_helper as helper\n", "\n", "# Import local functions\n", "from novelUrbanization import *\n", @@ -273,7 +274,7 @@ "source": [ "final_res = adm1_bounds.copy()\n", "for pop_layer in pop_files:\n", - " # zonal stats on DOU filess\n", + " # zonal stats on DOU files\n", " pop_name = os.path.basename(pop_layer)[:-4]\n", " dou_urban_file = os.path.join(urban_folder, f\"{pop_name}_urban.tif\")\n", " dou_hd_urban_file = os.path.join(urban_folder, f\"{pop_name}_urban_hd.tif\")\n", diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md index dcefbac..fd915f1 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md @@ -1,7 +1,9 @@ # Novel Urbanization + The code herein support the extraction of the data and calculation of urban extents for the World Bank's experiment and investigation of two methods for evaluating urban: ## [Degree of Urbanization](https://ghsl.jrc.ec.europa.eu/degurbaOverview.php) + The European Commission developed a globally consistent, people-centric definition of urban areas. The basic approach is to apply a threshold to population grids on both the minimum population density, and then on the minimum total population of the resulting settlements. While the team at the EC continues to advance and iterate on their methodology, we rely on the original definitions of urban they produced: | Urban area | Min Pop Density | Min Settlement Pop | @@ -12,11 +14,12 @@ The European Commission developed a globally consistent, people-centric definiti ## [Bellefon (2021)](https://www.sciencedirect.com/science/article/pii/S0094119019301032) This method eschews the absolute density thresholds of the EC methodology and instead compares actual building density to counterfactual density after random redistribution of the pixel values in its neighbourhood. The methodology is applied following these steps: -1. Computing a smoothed density of the variable on which the delineation is based (building density or population). -2. Randomly redistributing pixel values (3000 iterations) the variable values at the pixel level across all livable areas such that obtaining, for each pixel, a counterfactual distribution of the variable considered. -3. Classifying as urban those pixels for which the actual density is above the 95th percentile of the smoothed counterfactual distribution. -4. Grouping contiguous urban pixels to form urban areas -5. Then the random redistribution of pixels’ values is repeated but within these Urban areas only, which gives a new counterfactual distribution. Contiguous pixels of urban areas for which actual density is above the 95th percentile of this within urban area new counterfactual distribution are classified as Cores. Urban areas that have at least one core are classified as Cities. + +1. Computing a smoothed density of the variable on which the delineation is based (building density or population). +2. Randomly redistributing pixel values (3000 iterations) the variable values at the pixel level across all livable areas such that obtaining, for each pixel, a counterfactual distribution of the variable considered. +3. Classifying as urban those pixels for which the actual density is above the 95th percentile of the smoothed counterfactual distribution. +4. Grouping contiguous urban pixels to form urban areas +5. Then the random redistribution of pixels’ values is repeated but within these Urban areas only, which gives a new counterfactual distribution. Contiguous pixels of urban areas for which actual density is above the 95th percentile of this within urban area new counterfactual distribution are classified as Cores. Urban areas that have at least one core are classified as Cities. | Urban area | label | Definition | | --- | --- | --- | diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py index 0d59808..1055280 100755 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py @@ -24,7 +24,23 @@ class urban_data(object): def __init__(self, iso3, base_folder, aapc_folder): - """Summarize completed urbanization layers; combine into single output""" + """ + Summarize completed urbanization layers; combine into single output + + Parameters + ---------- + iso3 : string + 3 letter country code + base_folder : string + location of dartboard urbanization + aapc_folder : string + location of aapc urbanization + + Returns + ------- + None + + """ self.iso3 = iso3 self.in_folder = base_folder self.aapc_folder = aapc_folder @@ -33,11 +49,16 @@ def __init__(self, iso3, base_folder, aapc_folder): def get_urban_layers(self): """get a list of all urban deleniations - INPUT - aapc_folder [string] - folder containing dartboard deleniations + Parameters + ---------- + aapc_folder : string + folder containing dartboard deleniations + + Returns + ------- + list + list of dartboard urban deleniations - RETURNS - [list of strings] """ db_urban_files = [] for root, dirs, files in os.walk(self.in_folder): @@ -64,8 +85,30 @@ def jaccard_index( db_urb="_ur.tif", db_hd="_co.tif", ): - """Calculate the Jaccard index comparing urban and then hd urban layers + """ + Calculate the Jaccard index comparing urban and then hd urban layers https://www.statisticshowto.com/jaccard-index/ + + Parameters + ---------- + pop_type : string or tuple of strings + type of population data to compare + res : string + resolution of data to compare + dou_urb : string + string to identify urban layer from aapc + dou_hd : string + string to identify high density urban layer from aapc + db_urb : string + string to identify urban layer from dartboard + db_hd : string + string to identify high density urban layer from dartboard + + Returns + ------- + dict + dictionary containing the jaccard index for urban and hd urban layers + """ if pop_type.__class__ == str: sel_rasters = self.get_rasters(pop_type, pop_type, res) @@ -94,6 +137,22 @@ def jaccard_index( db_hd_d = (db_hd_d > 0) * 1 def calculate_jaccard(inD1, inD2): + """ + Calculate the Jaccard index comparing urban and then hd urban layers + + Parameters + ---------- + inD1 : array + array of urban data + inD2 : array + array of urban data + + Returns + ------- + float + jaccard index + + """ # Calculate urban jaccard jaccardD = inD1 + inD2 xx = np.unique(jaccardD, return_counts=True) @@ -108,7 +167,24 @@ def calculate_jaccard(inD1, inD2): return {"urb_jaccard": urb_jaccard, "hd_jaccard": hd_jaccard} def get_rasters(self, pop_type_dou="gpo", pop_type_db="gpo", res=""): - """filter rasters based on pop_type and resolution""" + """ + Filter rasters based on pop_type and resolution + + Parameters + ---------- + pop_type_dou : string + type of population data from aapc + pop_type_db : string + type of population data from dartboard + res : string + resolution of data to compare + + Returns + ------- + list + list of rasters that match the input parameters + + """ sel_rasters = [] for f in self.dou_urban_files: if pop_type_dou in f: @@ -130,8 +206,20 @@ def get_rasters(self, pop_type_dou="gpo", pop_type_db="gpo", res=""): def generate_combo_layer(self, pop_type="gpo", res="", debug=False): """open urban rasters and combine into a single dataset - INPUT - pop_type [string or tuple of strings] + Parameters + ---------- + pop_type : string or tuple of strings, optional + type of population data to compare + res : string, optional + resolution of data to compare + debug : bool, optional + print out the selected rasters + + Returns + ------- + dict + dictionary containing the combined urban and hd urban layers + """ if pop_type.__class__ == str: sel_rasters = self.get_rasters(pop_type, pop_type, res) @@ -143,7 +231,7 @@ def generate_combo_layer(self, pop_type="gpo", res="", debug=False): print(p) if len(sel_rasters) > 0: - # Open all the ratser files and covert to pixel-level summary numbers + # Open all the ratser files and convert to pixel-level summary numbers idx = 0 for cur_raster in sel_rasters: curR = rasterio.open(cur_raster) @@ -168,11 +256,22 @@ def generate_combo_layer(self, pop_type="gpo", res="", debug=False): def write_results(self, res, out_folder, reclass_bin=True, dbhd="co"): """Write the results from the function generate_combo_layer to file - INPUT - res [dictionary] - results from function generate_combo_layer - out_folder [string] - path to directory to create output tif files - [optional] reclass_bin [boolean: default True] - reclassify the binary map product into - 4 classes: agree urban, agree rural, disagree on urban class, disagree on rurality + Parameters + ---------- + res : dictionary + results from function generate_combo_layer + out_folder : string + path to directory to create output tif files + reclass_bin :boolean, optional + default True, reclassify the binary map product into + 4 classes: agree urban, agree rural, disagree on urban class, disagree on rurality + dbhd : string, optional + default "co", type of population data from dartboard + + Returns + ------- + None + """ out_sum_file = os.path.join(out_folder, f"{self.iso3}_urban_sum_{dbhd}.tif") out_bin_file = os.path.join(out_folder, f"{self.iso3}_urban_binary_{dbhd}.tif") @@ -240,6 +339,37 @@ def calculate_urban( include_ghsl_h20=True, evaluate=False, ): + """ + Calculate urbanization for a country + + Parameters + ---------- + iso3 : string + 3 letter country code + inG : geopandas dataframe + polygons to summarize urbanization + inG2 : geopandas dataframe + polygons to summarize urbanization + pop_files : list + list of population files + ea_file : string + location of admin boundary file + output_folder : string + location to save output files + km : boolean, optional + default True, process 1km data + small : boolean, optional + default True, process 250m data + include_ghsl_h20 : boolean, optional + default True, include ghsl water layer + evaluate : boolean, optional + default False, evaluate the output + + Returns + ------- + None + + """ global_landcover = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif" global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif" global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif" @@ -278,8 +408,8 @@ def calculate_urban( final_folder="FINAL_STANDARD_1KM", ghspop_suffix="1k", ) - adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv") - ea_res = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv") + # adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv") + # ea_res = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv") tPrint(f"{iso3} ***1k Extracting Global Layers") xx.extract_layers( global_landcover, @@ -343,12 +473,24 @@ def calculate_urban( def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3=""): """Summarize urbanization from Pierre-Philippe's Dartboard methodology - INPUT - input_folder [string path] - location of dartboard urbanization - default_pop_file [string path] - default pop filename to use for urban population calculations - admin_layer [string path] - zones used to summarize population - RETURN - [geopandas dataframe] - contains total population and urban population for each shape + Parameters + ---------- + input_folder : string path + location of dartboard urbanization + default_pop_file : string path + default pop filename to use for urban population calculations + admin_layer : string path + zones used to summarize population + output_folder : string path + location to save output files + iso3 : string, optional + default "", filter to only process a specific country + + Returns + ------- + geopandas dataframe + contains total population and urban population for each shape + """ urban_layers = [ os.path.join(in_folder, x) for x in os.listdir(in_folder) if x[-4:] == ".tif" @@ -358,9 +500,9 @@ def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3= cur_layer = urban_layers[0] inD = gpd.read_file(admin_layer) - default_pop_1k = default_pop_file.replace( - default_pop_file[:3], "%s1k" % default_pop_file[:3] - ) + # default_pop_1k = default_pop_file.replace( + # default_pop_file[:3], "%s1k" % default_pop_file[:3] + # ) for cur_layer in urban_layers: # tPrint(cur_layer) # Open and read in urban data @@ -369,10 +511,10 @@ def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3= urban_data = (urban_data > 0).astype(urban_r.meta["dtype"]) # Extract population data urban_layer = os.path.basename(cur_layer) - default_pop = default_pop_file + # default_pop = default_pop_file if "1k" in urban_layer: - default_pop = default_pop_1k + # default_pop = default_pop_1k pop_layer = os.path.basename(cur_layer)[:11] pop_folder = os.path.join(output_folder, "FINAL_STANDARD_1KM") else: @@ -382,7 +524,7 @@ def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3= if not os.path.exists(pop_file): if "1k" in urban_layer: - default_pop = default_pop_1k + # default_pop = default_pop_1k pop_layer = os.path.basename(cur_layer)[:9] pop_folder = os.path.join(output_folder, "FINAL_STANDARD_1KM") else: @@ -413,7 +555,19 @@ def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3= def check_no_data(in_folder): - """loop through all the tif files in the FINAL folders and calculate the number of no-data cells""" + """ + Loop through all the tif files in the FINAL folders and calculate the number of no-data cells + + Parameters + ---------- + in_folder : string path + location of dartboard urbanization + + Returns + ------- + None + + """ for root, dirs, files in os.walk(in_folder): if "FINAL" in root: for f in files: @@ -426,7 +580,23 @@ def check_no_data(in_folder): def pp_point_urban_summaries(inD, urban_tiffs, out_file): - """summarize urbanization for point locations (inD) for each urban definition file (urban_tiffs)""" + """ + Summarize urbanization for point locations (inD) for each urban definition file (urban_tiffs) + + Parameters + ---------- + inD : geopandas dataframe + point locations to summarize urbanization + urban_tiffs : list + list of urban definition files + out_file : string path + location to save output file + + Returns + ------- + None + + """ for pFile in urban_tiffs: if pFile.endswith(".tif"): try: @@ -441,13 +611,29 @@ def pp_point_urban_summaries(inD, urban_tiffs, out_file): inD[os.path.basename(pFile).replace(".tif", "")] = [ x[0] for x in list(urb_res) ] - except: + except Exception: pass pd.DataFrame(inD.drop(["geometry"], axis=1)).to_csv(out_file) def point_urban_summaries(inD, pop_tiffs, out_file): - """summarize urbanization for point locations (inD) for each population file (pop_tiffs)""" + """ + Summarize urbanization for point locations (inD) for each population file (pop_tiffs) + + Parameters + ---------- + inD : geopandas dataframe + point locations to summarize urbanization + pop_tiffs : list + list of population files + out_file : string path + location to save output file + + Returns + ------- + None + + """ for pFile in pop_tiffs: urb_file = pFile.replace(".tif", "_urban.tif") hd_file = pFile.replace(".tif", "_urban_hd.tif") @@ -465,19 +651,42 @@ def point_urban_summaries(inD, pop_tiffs, out_file): inD[os.path.basename(curFile).replace(".tif", "")] = [ x[0] for x in list(urb_res) ] - except: + except Exception: pass pd.DataFrame(inD.drop(["geometry"], axis=1)).to_csv(out_file) def run_country(iso3): - local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format( - country=iso3 - ) + # local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format( + # country=iso3 + # ) + pass def run_zonal(iso3, output_folder, inG, pop_files, ea_file, pt_file): - """Summarize zonal statistics for urbanization numbers against polygons and points for both WB and PP urban calculations""" + """ + Summarize zonal statistics for urbanization numbers against polygons and points for both WB and PP urban calculations + + Parameters + ---------- + iso3 : string + 3 letter country code + output_folder : string + location to save output files + inG : geopandas dataframe + polygons to summarize urbanization + pop_files : list + list of population files + ea_file : string + location of admin boundary file + pt_file : string + location of point file + + Returns + ------- + None + + """ tPrint(f"Starting zonal calculations {iso3}") pp_deleniations_folder = ( "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AAPPC/Delineations" @@ -665,7 +874,7 @@ def run_zonal(iso3, output_folder, inG, pop_files, ea_file, pt_file): cur_def = EA_DEFS[iso3] ea_file = os.path.join(cur_def[0], cur_def[1]) pt_file = os.path.join(cur_def[0], cur_def[2]) - except: + except Exception: ea_file = "" pt_file = "" diff --git a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md index 5d9d788..9d13f8d 100644 --- a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md +++ b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md @@ -1,2 +1,3 @@ # Just energy transition + tbd diff --git a/notebooks/Implementations/WSF/wsfdata.py b/notebooks/Implementations/WSF/wsfdata.py index 968f0dc..1923fe8 100755 --- a/notebooks/Implementations/WSF/wsfdata.py +++ b/notebooks/Implementations/WSF/wsfdata.py @@ -7,8 +7,15 @@ class wsf_dataset(object): def __init__(self, imageList): """Create object organizning and analyzing WSF data. - INPUT - imageList [array of strings] - list of paths to input images + Parameters + ---------- + imageList : array of strings + list of paths to input images + + Returns + ------- + None + """ for img in imageList: if "AW3D30.tif" in img: @@ -23,13 +30,20 @@ def __init__(self, imageList): def analyze_idc(self, outFile="", badThreshold=2): """Analyze the IDC (quality) image - INPUT - [optional] outfile [string] - path to create output file describing quality - [optional] badThreshold [number] - values above this will be considered low quality - RETURNS - [numpy array] - 2band np array of same size as the input IDC image. - Band 1 - Total number of years with bad data - Band 2 - Most recent year of bad data + Parameters + ---------- + outfile : string, optional + path to create output file describing quality + badThreshold : float, optional + values above this will be considered low quality + + Returns + ------- + numpy array + 2band np array of same size as the input IDC image. + Band 1 - Total number of years with bad data + Band 2 - Most recent year of bad data + """ idc = rasterio.open(self.evolution_idc) idcD = idc.read() @@ -41,7 +55,7 @@ def analyze_idc(self, outFile="", badThreshold=2): notGood = curD > badThreshold try: newestBadYear = max([i for i, x in enumerate(notGood) if x]) - except: + except Exception: newestBadYear = 0 outArray[0, cIdx, rIdx] = notGood.sum() outArray[1, cIdx, rIdx] = newestBadYear @@ -63,17 +77,24 @@ def correct_evolution_idc(self, outfile="", badThreshold=2): the WSF built date if the quality flag is worse than badThreshold. If it is worse, the cell is assigned the next date in the WSF quality flag that is of acceptable quality. - INPUT - [optional] outfile [string] - path to create output file with corrected evolution dataset - [optional] badThreshold [number] - values above this will be considered low quality - RETURNS - [numpy array] - np array of same size as the input evolution image. + Parameters + ---------- + outfile [string, optional + path to create output file with corrected evolution dataset + badThreshold : float, optional + values above this will be considered low quality + + Returns + ------- + numpy array + np array of same size as the input evolution image. + """ inEvolution = rasterio.open(self.evolution) inIDC = rasterio.open(self.evolution_idc) inE = inEvolution.read() inD = inIDC.read() - outArray = np.zeros(inE.shape) + # outArray = np.zeros(inE.shape) for rIdx in range(0, inE.shape[2]): for cIdx in range(0, inE.shape[1]): curE = inE[0, cIdx, rIdx] @@ -97,24 +118,29 @@ def correct_evolution_idc(self, outfile="", badThreshold=2): def generate_evolution_plot(self, dataset="normal"): """generate a dataframe for matplotlib plotting - INPUT - [optional] dataset [pandas dataframe] - provide a dataset to analyze, - if you don't want to read in the evolution dataset - - RETURNS - [geopandas dataframe] - - EXAMPLE - wsfD = wsfdata.wsf_dataset(images_list) - basePlot = wsfD.generate_evolution_plot() - # generate corrected data - correctedRes = wsfD.correct_evolution_idc(badThreshold=3) - correctedPlot = wsfD.generate_evolution_plot(dataset=correctedRes) - basePlot['corrected'] = correctedPlot['cumBuilt'] + Parameters + ---------- + dataset : pandas dataframe, optional + provide a dataset to analyze, if you don't want to read in the evolution dataset + + Returns + ------- + geopandas dataframe + dataframe with columns for built and cumBuilt + + Examples + -------- + >>> wsfD = wsfdata.wsf_dataset(images_list) + >>> basePlot = wsfD.generate_evolution_plot() + >>> # generate corrected data + >>> correctedRes = wsfD.correct_evolution_idc(badThreshold=3) + >>> correctedPlot = wsfD.generate_evolution_plot(dataset=correctedRes) + >>> basePlot['corrected'] = correctedPlot['cumBuilt'] + >>> + >>> basePlot.drop('built', axis=1).plot() + >>> + >>> basePlot['cumBuilt'].plot() - basePlot.drop('built', axis=1).plot() - - basePlot['cumBuilt'].plot() """ if dataset == "normal": evolution = rasterio.open(self.evolution) @@ -135,13 +161,18 @@ def summarize_idc(self, thresh): """Summarize IDC by measuring what percentage of the built cells in every year are above the defined quality threshold - INPUT - thresh [number] - value from 1-6 defining the acceptable quality threshold, every value + Parameters + ---------- + thresh : float + value from 1-6 defining the acceptable quality threshold, every value below or equal to that threshold (better than that value) are considered acceptable - RETURNS - [numpy array] - fraction of built cells that are of acceptable quality per year. HOPEFULLY + Returns + ------- + numpy array + fraction of built cells that are of acceptable quality per year. HOPEFULLY the reutrning array should be 31 records long + """ idc = rasterio.open(self.evolution_idc).read() evolution = rasterio.open(self.evolution).read() diff --git a/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb b/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb index b9c9d06..5db21ac 100755 --- a/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb +++ b/notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb @@ -39,7 +39,11 @@ "import pandas as pd\n", "import geopandas as gpd\n", "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import GOSTRocks.rasterMisc as rMisc\n", "\n", + "#reference the module in the WSF/ folder\n", + "from WSF import wsfdata\n", "# Get reference to GOSTRocks\n", "sys.path.append(\"../../../gostrocks/src\")\n", "sys.path.append(\"../../\")\n", diff --git a/notebooks/Replications/URB_NovelUrbanization.py b/notebooks/Replications/URB_NovelUrbanization.py index 4897bf6..6716b3b 100644 --- a/notebooks/Replications/URB_NovelUrbanization.py +++ b/notebooks/Replications/URB_NovelUrbanization.py @@ -9,6 +9,22 @@ def download_pop_file(url, filename): + """ + Download a file from a url to a local file + + Parameters + ---------- + url : string + URL to download from + + filename : string + Local file to save to + + Returns + ------- + None + + """ # Open the url r = requests.get(url) # Set decode_content value to True, otherwise the downloaded image file's size will be zero. @@ -19,6 +35,22 @@ def download_pop_file(url, filename): def main(iso3, out_folder): + """ + Download population data and calculate urban extents + + Parameters + ---------- + iso3 : string + ISO3 code for country + + out_folder : string + Folder to save data to + + Returns + ------- + None + + """ # download the population data wp_url = f"https://data.worldpop.org/GIS/Population/Global_2000_2020_1km/2020/{iso3.upper()}/{iso3.lower()}_ppp_2020_1km_Aggregated.tif" print(wp_url) @@ -31,7 +63,7 @@ def main(iso3, out_folder): try: if not os.path.exists(out_file): download_pop_file(wp_url, out_file) - except: + except Exception: print(f"Could not download national population data for {iso3} from {wp_url}") print( "If you can manually download to the defined out_folder, the script will run" diff --git a/notebooks/Tutorials/LEI_Example.ipynb b/notebooks/Tutorials/LEI_Example.ipynb index 9a9155a..7509179 100644 --- a/notebooks/Tutorials/LEI_Example.ipynb +++ b/notebooks/Tutorials/LEI_Example.ipynb @@ -147,7 +147,7 @@ "lei_90_00[\"area\"] = lei_90_00[\"geometry\"].apply(lambda x: x.area)\n", "\n", "\n", - "def calculate_LEI(val, leap_val, exp_val):\n", + "def calculate_LEI_func(val, leap_val, exp_val):\n", " if val <= leap_val:\n", " return 3\n", " elif val < exp_val:\n", @@ -157,7 +157,7 @@ "\n", "\n", "lei_90_00[\"class\"] = lei_90_00[\"LEI\"].apply(\n", - " lambda x: calculate_LEI(x, leap_val, exp_val)\n", + " lambda x: calculate_LEI_func(x, leap_val, exp_val)\n", ")\n", "mapMisc.static_map_vector(\n", " lei_90_00, \"class\", edgecolor=\"match\", colormap=\"Dark2\"\n", @@ -218,7 +218,7 @@ "\n", "\n", "lei_90_00[\"class\"] = lei_90_00[\"LEI\"].apply(\n", - " lambda x: calculate_LEI(x, leap_val, exp_val)\n", + " lambda x: calculate_LEI_func(x, leap_val, exp_val)\n", ")\n", "mapMisc.static_map_vector(\n", " lei_90_00, \"class\", edgecolor=\"match\", colormap=\"Dark2\"\n", diff --git a/pyproject.toml b/pyproject.toml index 10e49ee..ad249ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,9 +58,29 @@ source = "vcs" version-file = "src/GOSTurban/_version.py" [tool.codespell] -skip = 'docs/_build,docs/references.bib,__pycache__,*.png,*.gz,*.whl' +skip = 'docs/_build,docs/references.bib,__pycache__,*.png,*.gz,*.whl,pyproject.toml,*csv,notebooks/Implementations/WSF_DECAT_B_ExploringIDCerrors.ipynb,notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb' ignore-regex = '^\s*"image\/png":\s.*' -ignore-words-list = "gost," +ignore-words-list = "gost,GOST,ALOS,ans,NAM" [tool.ruff.lint.pydocstyle] convention = "numpy" + +[tool.ruff] +exclude = [ + "WSF_DECAT_B_ExploringIDCerrors.ipynb", + "KAZ_Urbanization_Review.ipynb", + "Urban_metrics_Fullness.ipynb", + "Urban_metrics_Shape.ipynb", + "Urban_metrics_Sprawl.ipynb", + "Urban_metrics_Structure.ipynb", + "URB_DECAT_B_ExploringGHSSMODcode.ipynb", + "URB_SCAUR_UKR_B_I_urbanizationReview.ipynb", + "URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb", + "Zonal_statistics.ipynb", + "Data_Preparation.ipynb", + "URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb", + "Create_Mosaick_Datasets.ipynb", + "GHSL_Standardize_To_Country.ipynb", + "MAP_Urbanization.ipynb", + "NGA_specific_results.ipynb", +] diff --git a/src/GOSTurban/LEI.py b/src/GOSTurban/LEI.py index b51d10d..58e6588 100755 --- a/src/GOSTurban/LEI.py +++ b/src/GOSTurban/LEI.py @@ -14,7 +14,30 @@ def mp_lei( curRxx, transformxx, idx_xx, old_list=[4, 5, 6], new_list=[3], buffer_dist=300 ): - """calculate and summarize LEI for curRxx, designed for use in multiprocessing function""" + """ + Calculate and summarize LEI for curRxx, designed for use in a multiprocessing function. + + Parameters + ---------- + curRxx : numpy array + raster data + transformxx : rasterio.transform + rasterio transformation object + idx_xx : int + index of the raster + old_list : list of ints, optional + Values in built area raster to consider old urban. The default is [4, 5, 6]. + new_list : list of ints, optional + Values in built area raster to consider new urban. The default is [3]. + buffer_dist : int, optional + distance to buffer new urban areas for comparison to old urban extents. The default is 300. + + Returns + ------- + pandas.dataframe + summary of LEI results + + """ curRes = calculate_LEI( curRxx, transform=transformxx, @@ -40,14 +63,35 @@ def lei_from_feature( measure_crs=None, idx_col=None, ): - """Calculate LEI for each feature in inD, leveraging multi-processing, based on the built area in inR - - INPUT - inD [geopandas] - inR [rasterio] - [optional] nCores [int] - number of cores to use in multi-processing - [optional] measure_crs [string] - string to convert all data to a CRS where distance and area measurements don't suck ie - "ESRI:54009" - ... see calculate_LEI for remaining arguments + """ + Calculate LEI for each feature in inD, leveraging multi-processing, based on the built area in inR + + Parameters + ---------- + inD : geopandas dataframe + input data + inR : rasterio object + raster data + old_list : list of ints, optional + Values in built area raster to consider old urban. The default is [4, 5, 6]. + new_list : list of ints, optional + Values in built area raster to consider new urban. The default is [3]. + buffer_dist : int, optional + distance to buffer new urban areas for comparison to old urban extents. The default is 300. + transform : rasterio.transform, optional + rasterio transformation object. The default is "". + nCores : int, optional + number of cores to use in multi-processing + measure_crs : string, optional + string to convert all data to a CRS where distance and area measurements don't suck ie - "ESRI:54009" + idx_col : string, optional + column name to use as index + + Returns + ------- + pandas.dataframe + summary of LEI results + """ if inD.crs != inR.crs: inD = inD.to_crs(inR.crs) @@ -55,7 +99,7 @@ def lei_from_feature( if measure_crs is not None: measureD = inD.to_crs(measure_crs) - lei_results = {} + # lei_results = {} # For grid cells, extract the GHSL and calculate in_vals = [] tPrint("***** Preparing values for multiprocessing") @@ -95,30 +139,39 @@ def lei_from_feature( def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=""): - """Calculate Landscape Expansion Index (LEI) through comparison of categorical values in a single raster dataset. + """ + Calculate Landscape Expansion Index (LEI) through comparison of categorical values in a single raster dataset. - :param inputGHSL: Path to a geotiff or a rasterio object, or a numpy array containing the categorical + Parameters + ---------- + inputGHSL : str or rasterio.DatasetReader or numpy array + Path to a geotiff or a rasterio object, or a numpy array containing the categorical data used to calculate LEI - :type inputGHSL: rasterio.DatasetReader - :param old_list: Values in built area raster to consider old urban. - :type old_list: list of ints - :param new_list: Values in built area raster to consider new urban - :type new_list: list of ints - :param buffer_dist: distance to buffer new urban areas for comparison to old urban extents, defaults to 300 (m) - :type buffer_dist: int, optional - :param transform: rasterio transformation object. Required if inputGHSL is a numpy array, defaults to '' - :type transform: str, optional - :returns: individual vectors of new built areas with LEI results. Each item is a single new built feature with three columns: - 1. geometry of the new built area feature - 2. number of pixels in new built area donut from old built area - 3. area of new built area buffer - - :example: - # This calculates the LEI between 1990 and 2000 in the categorical GHSL - lei_raw = calculate_LEI(input_ghsl, old_list = [5,6], new_list=[4]) - lei_90_00 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total']) - lei_90_00['LEI'] = lei_90_00['old'] / lei_90_00['total'] - lei_90_00.head() + old_list : list of ints + Values in built area raster to consider old urban. + new_list : list of ints + Values in built area raster to consider new urban + buffer_dist : int, optional + distance to buffer new urban areas for comparison to old urban extents, defaults to 300 (m) + transform : rasterio transformation object. string, optional + Required if inputGHSL is a numpy array, defaults to '' + + Returns + ------- + list + individual vectors of new built areas with LEI results. Each item is a single new built feature with three columns: + 1. geometry of the new built area feature + 2. number of pixels in new built area donut from old built area + 3. area of new built area buffer + + Examples + -------- + >>> # This calculates the LEI between 1990 and 2000 in the categorical GHSL + >>> lei_raw = calculate_LEI(input_ghsl, old_list = [5,6], new_list=[4]) + >>> lei_90_00 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total']) + >>> lei_90_00['LEI'] = lei_90_00['old'] / lei_90_00['total'] + >>> lei_90_00.head() + """ if isinstance(inputGHSL, str): inRaster = rasterio.open(inputGHSL) @@ -143,7 +196,7 @@ def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=""): # Clip out the original shape to leave just the donut try: donutArea = bufferArea.difference(curShape) - except: + except Exception: bufferArea = bufferArea.buffer(0) curShape = curShape.buffer(0) donutArea = bufferArea.difference(curShape) @@ -165,16 +218,23 @@ def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=""): def summarize_LEI(in_file, leap_val=0.05, exp_val=0.9): - """Summarize the LEI results produced by self.calculate_LEI - - :param in_file: LEI results generated from calculate_LEI above - :type in_file: string path to csv file or pandas dataframe - :param leap_val: LEI value below which areas are considered to be leapfrog, defaults to 0.05 - :type leap_val: float, optional - :param exp_val: LEI value above which areas are considered to be infill, defaults to 0.9 - :type exp_val: float, optional - :returns: pandas groupby row summarizing area in m2 of leapfrog, expansion, and infill areas - :rtype: pandas groupby row + """ + Summarize the LEI results produced by self.calculate_LEI + + Parameters + ---------- + in_file : string path to csv file or pandas dataframe + LEI results generated from calculate_LEI above + leap_val : float, optional + LEI value below which areas are considered to be leapfrog, defaults to 0.05 + exp_val : float, optional + LEI value above which areas are considered to be infill, defaults to 0.9 + + Returns + ------- + pandas groupby row + pandas groupby row summarizing area in m2 of leapfrog, expansion, and infill areas + """ if isinstance(in_file, str): res = pd.read_csv(in_file) @@ -185,6 +245,24 @@ def summarize_LEI(in_file, leap_val=0.05, exp_val=0.9): res["area"] = res["geometry"].apply(lambda x: x.area) def calculate_LEI(val, leap_val, exp_val): + """ + Calculate the LEI class based on the input value + + Parameters + ---------- + val : float + LEI value + leap_val : float + LEI value below which areas are considered to be leapfrog + exp_val : float + LEI value above which areas are considered to be infill + + Returns + ------- + string + LEI class + + """ if val <= leap_val: return "Leapfrog" elif val < exp_val: diff --git a/src/GOSTurban/UrbanRaster.py b/src/GOSTurban/UrbanRaster.py index 9a05029..b792afc 100644 --- a/src/GOSTurban/UrbanRaster.py +++ b/src/GOSTurban/UrbanRaster.py @@ -23,17 +23,37 @@ from shapely.geometry import shape, Polygon from geopy.geocoders import Nominatim -"""prints the time along with the message""" - def tPrint(s): + """ + Print the time along with the message. + + Parameters + ---------- + s : string + message to print + + Returns + ------- + None. + + """ print("%s\t%s" % (time.strftime("%H:%M:%S"), s)) def geocode_cities(urban_extents): """Generate names for polygon urban extents - :param urban_extents: geopandas dataframe of polygons to be named. Need to be in epsg:4326 + Parameters + ---------- + urban_extents : gpd.GeoDataFrame + geopandas dataframe of polygons to be named. Need to be in epsg:4326 + + Returns + ------- + gpd.GeoDataFrame + input geopandas dataframe with added columns for city, state, and country + """ geolocator = Nominatim(user_agent="new_app") all_res = [] @@ -53,15 +73,15 @@ def geocode_cities(urban_extents): res = all_res[idx] try: urban_extents.loc[idx, "City"] = res.raw["address"]["city"] - except: + except Exception: break try: urban_extents.loc[idx, "State"] = res.raw["address"]["state"] - except: + except Exception: pass try: urban_extents.loc[idx, "Country"] = res.raw["address"]["country"] - except: + except Exception: pass return urban_extents @@ -71,9 +91,17 @@ def __init__(self, inRaster): """ Create urban definitions using gridded population data. - :param inRaster: string or rasterio object representing gridded population data + Parameters + ---------- + inRaster : string or rasterio object + string or object representing gridded population data + + Returns + ------- + None. + """ - if type(inRaster) == str: + if isinstance(inRaster, str): self.inR = rasterio.open(inRaster) elif isinstance(inRaster, rasterio.DatasetReader): self.inR = inRaster @@ -106,12 +134,31 @@ def calculateDegurba( (12) Rural, dispersed, low density - dens: >50, (11) Rural, dispersed, low density - the rest that are populated - :param urbDens: integer of the minimum density value to be counted as urban - :param hdDens: integer of the minimum density value to be counted as high density - :param urbThresh: integer minimum total settlement population to be considered urban - :param hdThresh: integer minimum total settlement population to be considered high density - """ + Parameters + ---------- + urbDens : integer + minimum density value to be counted as urban + hdDens : integer + minimum density value to be counted as high density + urbThresh : integer + minimum total settlement population to be considered urban + hdThresh : integer + minimum total settlement population to be considered high density + minPopThresh : integer + minimum population to be considered a settlement + out_raster : string, optional + path to save the output raster. The default is "". + print_message : string, optional + message to print with each step. The default is "". + verbose : boolean, optional + print messages. The default is False. + + Returns + ------- + dictionary + dictionary containing the final raster, the high density raster, the urban raster, and the shapes of the urban areas + """ popRaster = self.inR data = popRaster.read() urban_raster = data * 0 @@ -146,11 +193,11 @@ def modal(P): idx = idx + 1 if value > 0: # RRemove holes from urban shape - origShape = cShape + # origShape = cShape xx = shape(cShape) xx = Polygon(xx.exterior) cShape = xx.__geo_interface__ - # If the shape is urban, claculate total pop + # If the shape is urban, calculate total pop mask = rasterize( [(cShape, 0)], out_shape=data[0, :, :].shape, @@ -192,7 +239,7 @@ def modal(P): tPrint("%s: Creating Shape %s" % (print_message, idx)) idx = idx + 1 if value > 0: - # If the shape is urban, claculate total pop + # If the shape is urban, calculate total pop mask = rasterize( [(cShape, 0)], out_shape=data[0, :, :].shape, @@ -241,7 +288,7 @@ def calc_nearest(x, dist_gpd, dist_idx): dists = xx["geometry"].apply(lambda y: y.distance(x)) try: return min(dists[dists > 0]) - except: + except Exception: return 0 to_be["dist"] = to_be["geometry"].apply( @@ -291,19 +338,34 @@ def calculateUrban( Generate urban extents from gridded population data through the application of a minimum density threshold and a minimum total population threshold - :param densVal: integer of the minimum density value to be counted as urban - :param totalPopThresh: integer minimum total settlement population to ne considered urban - :param smooth: boolean to run a single modal smoothing function (this should be run when running - on WorldPop as the increased resolution often leads to small holes and funny shapes - :param verbose: boolean on what messages to receive - :param queen: boolean to determine whether to dissolve final shape to connect queen's contiguity - :param raster: string path to create a boolean raster of urban and not. - Empty string is the default and will create no raster - :param raster_pop: string path to create a raster of the population layer only in the urban areas - Empty string is the default and will create no raster - :returns: GeoPandasDataFrame of the urban extents - """ + Parameters + ---------- + densVal : integer, optional + minimum density value to be counted as urban + totalPopThresh : integer, optional + minimum total settlement population to ne considered urban + smooth : boolean, optional + toggle to run a single modal smoothing function (this should be run when running + on WorldPop as the increased resolution often leads to small holes and funny shapes + verbose : boolean + what messages to receive + queen : boolean + determine whether to dissolve final shape to connect queen's contiguity + raster : str + string path to create a boolean raster of urban and not. + Empty string is the default and will create no raster + raster_pop : str + string path to create a raster of the population layer only in the urban areas + Empty string is the default and will create no raster + print_message : str + message to print with each step. The default is "". + + Returns + ------- + gpd.GeoDataFrame + geopandas dataframe of urban extents + """ popRaster = self.inR data = popRaster.read() urbanData = (data > densVal) * 1 @@ -319,7 +381,7 @@ def calculateUrban( if idx % 1000 == 0 and verbose: tPrint("%s: Creating Shape %s" % (print_message, idx)) if value == 1: - # If the shape is urban, claculate total pop + # If the shape is urban, calculate total pop mask = rasterize( [(cShape, 0)], out_shape=data[0, :, :].shape, @@ -330,7 +392,7 @@ def calculateUrban( curPop = np.nansum(inData) if ( curPop < 0 - ): # when smoothed, sometimes the pop withh be < 0 because of no data + ): # when smoothed, sometimes the pop width is < 0 because of no data inData = np.ma.array(data=inData, mask=(inData < 0).astype(bool)) curPop = np.nansum(inData) if curPop > totalPopThresh: @@ -355,14 +417,14 @@ def calculateUrban( urban_raster[0, :, :] = urban_res allFeatures = [] - badFeatures = [] + # badFeatures = [] for cShape, value in features.shapes( urban_raster, transform=popRaster.transform ): if idx % 1000 == 0 and verbose: tPrint("%s: Creating Shape %s" % (print_message, idx)) if value == 1: - # If the shape is urban, claculate total pop + # If the shape is urban, calculate total pop mask = rasterize( [(cShape, 0)], out_shape=data[0, :, :].shape, @@ -373,7 +435,7 @@ def calculateUrban( curPop = np.nansum(inData) if ( curPop < 0 - ): # when smoothed, sometimes the pop withh be < 0 because of no data + ): # when smoothed, sometimes the pop width is < 0 because of no data inData = np.ma.array(data=inData, mask=(inData < 0).astype(bool)) curPop = np.nansum(inData) if curPop > totalPopThresh: diff --git a/src/GOSTurban/country_helper.py b/src/GOSTurban/country_helper.py index aef766a..1806aca 100755 --- a/src/GOSTurban/country_helper.py +++ b/src/GOSTurban/country_helper.py @@ -16,16 +16,27 @@ class urban_country: - """helper function to centralize urban calculations for a single country""" + """Helper function to centralize urban calculations for a single country""" def __init__(self, iso3, sel_country, cur_folder, inP): - """calculate urban extents for selected country and population raster + """ + Calculate urban extents for selected country and population raster. + + Parameters + ---------- + iso3 : string + ISO 3 of selected country + sel_country : geopandas dataframe + selected country bounds + cur_folder : string path + path to output folder + inP : rasterio read + opened population raster dataset + + Returns + ------- + None - INPUT - iso3 [string] - ISO 3 of selected country - sel_country [geopandas dataframe] - selected country bounds - cur_folder [string path] - path to output folder - inP [rasterio read] - opened population raster dataset """ self.iso3 = iso3 self.sel_country = sel_country @@ -52,12 +63,26 @@ def __init__(self, iso3, sel_country, cur_folder, inP): self.urban_ghsl = os.path.join(cur_folder, f"{iso3}_urban_ghsl.csv") self.urban_hd_ghsl = os.path.join(cur_folder, f"{iso3}_urban_hd_ghsl.csv") - if type(inP) == str: + if isinstance(inP, str): inP = rasterio.open(inP) self.inP = inP def calculate_urban_extents(self, calculate_area=True, area_crs=3857): - """Run EC urban extent analysis""" + """ + Run EC urban extent analysis + + Parameters + ---------- + calculate_area : boolean + if True, calculate area of urban extents + area_crs : int + EPSG code for area calculation + + Returns + ------- + None + + """ urban_calculator = urban.urbanGriddedPop(self.inP) if not os.path.exists(self.urban_extents_file): tPrint(f"Running urbanization for {self.iso3}") @@ -82,7 +107,7 @@ def calculate_urban_extents(self, calculate_area=True, area_crs=3857): urban_extents = urban_extents.to_crs(4326) try: urban_extents = urban.geocode_cities(urban_extents) - except: + except Exception: pass urban_extents.to_file(self.urban_extents_file, driver="GeoJSON") @@ -112,7 +137,7 @@ def calculate_urban_extents(self, calculate_area=True, area_crs=3857): urban_extents_hd = urban_extents_hd.to_crs(4326) try: urban_extents_hd = urban.geocode_cities(urban_extents_hd) - except: + except Exception: pass urban_extents_hd.to_file(self.urban_extents_hd_file, driver="GeoJSON") self.urban_extents_hd = urban_extents_hd @@ -120,7 +145,19 @@ def calculate_urban_extents(self, calculate_area=True, area_crs=3857): self.urban_extents_hd = gpd.read_file(self.urban_extents_hd_file) def summarize_ntl(self, ntl_files=[]): - """run zonal analysis on nighttime lights using urban extents""" + """ + Run zonal analysis on nighttime lights using urban extents + + Parameters + ---------- + ntl_files : list of paths + path to individual nighttime lights raster files + + Returns + ------- + None + + """ if (not os.path.exists(self.urban_ntl)) or ( not os.path.exists(self.urban_hd_ntl) ): @@ -143,7 +180,7 @@ def summarize_ntl(self, ntl_files=[]): try: urbanD = self.urban_extents urbanHD = self.urban_extents_hd - except: + except Exception: self.calculate_urban_extents() urbanD = self.urban_extents urbanHD = self.urban_extents_hd @@ -165,8 +202,8 @@ def summarize_ntl(self, ntl_files=[]): ] hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names) hd_urban_df.to_csv(urban_hd_res_file) - except: - tPrint(f"***********ERROR with {iso3} and {name}") + except Exception: + tPrint(f"***********ERROR with {name}") # Compile VIIRS results urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("URBAN")] @@ -187,13 +224,24 @@ def summarize_ntl(self, ntl_files=[]): def summarize_ghsl( self, ghsl_files, binary_calc=False, binary_thresh=1000, clip_raster=False ): - """Summarize GHSL data + """ + Summarize GHSL data + + Parameters + ---------- + ghsl_files : list of paths + path to individual built area raster files + binary_calc : binary, optional + if True, additionally calculate zonal stats on a binary built raster, default=False + binary_thresh : int, optional + if binary_calc is True, all cells above threshold will be considered built, default=1000 + clip_raster : binary, optional + if True, clip the GHSL datasets for the calculations, default=False + + Returns + ------- + None - INPUT - ghsl_files [list of paths] - path to individual built area raster files - [optional] binary_calc [binary, default=False] - if True, additionally calculate zonal stats on a binary built raster - [optional] binary_thresh [int, default=1000] - if binary_calc is True, all cells above threshold will be considered built - [optional] clip_raster [binary, default=False] - if True, clip the GHSL datasets for the calculations """ if (not os.path.exists(self.urban_ghsl)) or ( not os.path.exists(self.urban_hd_ghsl) @@ -201,7 +249,7 @@ def summarize_ghsl( try: urbanD = self.urban_extents urbanHD = self.urban_extents_hd - except: + except Exception: self.calculate_urban_extents() urbanD = self.urban_extents urbanHD = self.urban_extents_hd @@ -238,7 +286,7 @@ def summarize_ghsl( localR = rasterio.open(local_file) inD = localR.read() inD[inD == localR.meta["nodata"]] = 0 - except: + except Exception: raise ( ValueError( "In order to calculate binary zonal, you need to clip out local ghsl data" @@ -263,7 +311,18 @@ def summarize_ghsl( pd.DataFrame(urbanHD.drop(["geometry"], axis=1)).to_csv(self.urban_hd_ghsl) def delete_urban_data(self): - """delete urban extents""" + """ + Delete urban extents. + + Parameters + ---------- + None + + Returns + ------- + None + + """ for cFile in [ self.urban_extents_file, self.urban_extents_raster_file, @@ -272,5 +331,5 @@ def delete_urban_data(self): ]: try: os.remove(cFile) - except: + except Exception: pass diff --git a/src/GOSTurban/urban_helper.py b/src/GOSTurban/urban_helper.py index 8e4db86..488378c 100755 --- a/src/GOSTurban/urban_helper.py +++ b/src/GOSTurban/urban_helper.py @@ -9,22 +9,37 @@ import pandas as pd import numpy as np -import GOSTurban.UrbanRaster as urban import GOSTRocks.rasterMisc as rMisc from GOSTRocks.misc import tPrint class summarize_population(object): - """summarize population and urban populations for defined admin regions""" + """Summarize population and urban populations for defined admin regions""" def __init__( self, pop_layer, admin_layer, urban_layer="", hd_urban_layer="", temp_folder="" ): - """Summarize population into urban and rural based on GOST_Urban.UrbanRaster.calculateUrban + """ + Summarize population into urban and rural based on GOST_Urban.UrbanRaster.calculateUrban + + Parameters + ---------- + pop_layer : string + Path to population layer + admin_layer : string + Path to admin layer + urban_layer : string, optional + Path to urban layer. The default is ''. + hd_urban_layer : string, optional + Path to high density urban layer. The default is ''. + temp_folder : string, optional + Path to temporary folder. The default is ''. + + Returns + ------- + None. - INPUT - pop_layer [string] - path """ self.pop_layer = pop_layer @@ -48,7 +63,19 @@ def __init__( self.temp_folder = temp_folder def check_inputs(self): - """Ensure all layers exist""" + """ + Ensure all layers exist + + Parameters + ---------- + None + + Returns + ------- + bool + True if all layers exist, False if any do not exist. + + """ check_vals = {} good = True for lyr in [self.pop_layer, self.urban_layer, self.urban_hd_layer]: @@ -59,13 +86,22 @@ def check_inputs(self): return good def calculate_zonal(self, out_name="", convert_urban_binary=False): - """Run zonal statistics on input admin layers, population layers, and urban layers - - Args: - out_name (str, optional): name to append to output populations columns. Defaults to ''. - convert_urban_binary (bool, optional): option to convert urban layer to binary. Anything > 0 becomes binary 1 for urban. Defaults to False. """ + Run zonal statistics on input admin layers, population layers, and urban layers + + Parameters + ---------- + out_name : str, optional + name to append to output populations columns. Defaults to ''. + convert_urban_binary : bool, optional + option to convert urban layer to binary. Anything > 0 becomes binary 1 for urban. Defaults to False. + + Returns + ------- + pd.DataFrame + Dataframe with zonal statistics for population and urban layers + """ inP = self.in_pop.read() inA = self.admin_layer # gpd.read_file(self.admin_layer) @@ -101,7 +137,7 @@ def calculate_zonal(self, out_name="", convert_urban_binary=False): ) try: final = final.join(res) - except: + except Exception: final = res return final @@ -120,11 +156,24 @@ def __init__( ): """Create object for managing input data for summarizing urban extents - INPUT - :param: iso3 - string describing iso3 code - :param: output_folder - string path to folder to hold results - :param: country_bounds - geopandas dataframe of admin0 boundary - + Parameters + ---------- + iso3 : str + string describing iso3 code + output_folder : str + string path to folder to hold results + country_bounds : gpd.GeoDataFrame + geopandas dataframe of admin0 boundary + pop_files : list + list of population files to summarize + final_folder : str, optional + string path to folder to hold final results. The default is ''. + ghspop_suffix : str, optional + string suffix to append to GHSPOP files. The default is ''. + + Returns + ------- + None. NAMING CONVENTION To save this renaming step on my side, which can also induce mistakes, would be possible for you Ben to rename the files in your code directly? This would be also helpful for all other countries we have to do, and for the 1km*1km rasters. @@ -134,6 +183,7 @@ def __init__( tza_upo15 and tza_upo18 for WorldPop population unconstrained tza_cpo15 and tza_cpo18 for WorldPop population constrained. Then for 1km*1km raster, names are the same except that the three lettres of the country's name are followed by 1k, ie tza1k_slo, tza1k_ele and so on. + """ self.iso3 = iso3 self.out_folder = output_folder @@ -186,7 +236,19 @@ def __init__( self.inD.to_file(self.admin_shp) def process_dem(self, global_dem=""): - """Download DEM from AWS, calculate slope""" + """ + Download DEM from AWS, calculate slope. + + Parameters + ---------- + global_dem : str, optional + string path to global dem file. The default is ''. + + Returns + ------- + None. + + """ # Download DEM if not os.path.exists(self.dem_file) and global_dem == "": @@ -223,7 +285,29 @@ def extract_layers( global_ghsl, global_smod, ): - """extract global layers for current country""" + """ + Extract global layers for current country. + + Parameters + ---------- + global_landcover : string + string path to global landcover file + global_ghspop : string + string path to global GHSPOP file + global_ghspop1k : string + string path to global GHSPOP1k file + global_ghbuilt : string + string path to global GHSBuilt file + global_ghsl : string + string path to global GHSL file + global_smod : string + string path to global SMOD file + + Returns + ------- + None. + + """ # Extract desert from globcover if not os.path.exists(self.desert_file): tPrint("Extracting desert") @@ -263,7 +347,7 @@ def extract_layers( if inR.crs.to_epsg() != self.inD.crs.to_epsg(): tempD = self.inD.to_crs(inR.crs) else: - tempD = inD + tempD = self.inD ul = inR.index(*tempD.total_bounds[0:2]) lr = inR.index(*tempD.total_bounds[2:4]) # read the subset of the data into a numpy array @@ -316,7 +400,7 @@ def extract_layers( if not os.path.exists(self.admin_file): tPrint("Rasterizing admin boundaries") xx = rasterio.open(self.ghspop_file) - res = xx.meta["transform"][0] + # res = xx.meta["transform"][0] tempD = self.inD.to_crs(xx.crs) shapes = ((row["geometry"], 1) for idx, row in tempD.iterrows()) burned = features.rasterize( @@ -332,9 +416,23 @@ def extract_layers( outR.write_band(1, burned) def calculate_urban(self, urb_val=300, hd_urb_val=1500): - """Calculate urban and HD urban extents from population files""" + """ + Calculate urban and HD urban extents from population files + + Parameters + ---------- + urb_val : int, optional + threshold value for urban extent. The default is 300. + hd_urb_val : int, optional + threshold value for high density urban extent. The default is 1500. + + Returns + ------- + None. + + """ # Calculate urban extents from population layers - ghs_R = rasterio.open(self.ghspop_file) + # ghs_R = rasterio.open(self.ghspop_file) for p_file in self.pop_files: final_pop = os.path.join( self.final_folder, @@ -345,31 +443,21 @@ def calculate_urban(self, urb_val=300, hd_urb_val=1500): print(final_pop) if "1k1k" in final_pop: final_pop = final_pop.replace("1k1k", "1k") - final_urban = final_pop.replace(".tif", "_urban.tif") - final_urban_hd = final_pop.replace(".tif", "_urban_hd.tif") - urbanR = urban.urbanGriddedPop(final_pop) - # Convert density values for urbanization from 1km resolution to current resolution - in_raster = rasterio.open(final_pop) - total_ratio = (in_raster.res[0] * in_raster.res[1]) / 1000000 - if not os.path.exists(final_urban): - urban_shp = urbanR.calculateUrban( - densVal=(urb_val * total_ratio), - totalPopThresh=5000, - raster=final_urban, - ) - if not os.path.exists(final_urban_hd): - cluster_shp = urbanR.calculateUrban( - densVal=(hd_urb_val * total_ratio), - totalPopThresh=50000, - raster=final_urban_hd, - smooth=True, - queen=True, - ) def pop_zonal_admin(self, admin_layer): - """calculate urban and rural + """ + Calculate urban and rural + + Parameters + ---------- + admin_layer : gpd.GeoDataFrame + geopandas dataframe of admin layer + + Returns + ------- + pd.DataFrame + dataframe of zonal statistics for population and urban layers - :param: - admin_layer """ for p_file in self.pop_files: pop_file = os.path.join( @@ -383,11 +471,12 @@ def pop_zonal_admin(self, admin_layer): yy = summarize_population(pop_file, admin_layer) if yy.check_inputs(): res = yy.calculate_zonal(out_name="") - out_file = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_{os.path.basename(p_file)}.csv" - try: - final = final.join(res) - except: - final = res + # out_file = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_{os.path.basename(p_file)}.csv" + final = res + # try: + # final = final.join(res) + # except Exception: + # final = res else: print("Error summarizing population for %s" % pop_file) admin_layer = admin_layer.reset_index() @@ -397,7 +486,20 @@ def pop_zonal_admin(self, admin_layer): return final def compare_pop_rasters(self, verbose=True): - """read in and summarize population rasters""" + """ + Read in and summarize population rasters. + + Parameters + ---------- + verbose : bool, optional + print results to console. The default is True. + + Returns + ------- + list + list of population rasters and their summed values + + """ all_res = [] for pFile in self.pop_files: inR = rasterio.open(pFile) @@ -409,7 +511,19 @@ def compare_pop_rasters(self, verbose=True): return all_res def standardize_rasters(self, include_ghsl_h20=True): - """ """ + """ + Standardize rasters to GHSPOP resolution and extent + + Parameters + ---------- + include_ghsl_h20 : bool, optional + include GHSL water layer. The default is True. + + Returns + ------- + None. + + """ ghs_R = rasterio.open(self.ghspop_file) pFile = self.ghspop_file if self.suffix == "1k": @@ -446,7 +560,7 @@ def standardize_rasters(self, include_ghsl_h20=True): for file_def in file_defs: try: out_file = file_def[3] - except: + except Exception: out_file = os.path.join( self.final_folder, os.path.basename(file_def[0]).replace( @@ -550,6 +664,18 @@ def evaluateOutput(self, admin_stats, commune_stats): c. calculate overlap between water and urban https://ghsl.jrc.ec.europa.eu/documents/cfs01/V3/CFS_Ghana.pdf + + Parameters + ---------- + admin_stats : string + path to admin stats file + commune_stats : string + path to commune stats file + + Returns + ------- + None. + """ stats_file = os.path.join( self.out_folder, "DATA_EVALUATION_%s_%s.txt" % (self.iso3, self.suffix) @@ -602,7 +728,7 @@ def evaluateOutput(self, admin_stats, commune_stats): out_stats.write( f"{name}: {((urbPop/tPop) * 100).round(2)}% Urban; {((hdPop/tPop) * 100).round(2)}% HD Urban\n" ) - except: + except Exception: print(f"Error processing {name}") print(fileDef) @@ -705,5 +831,5 @@ def evaluateOutput(self, admin_stats, commune_stats): try: curD_sum = curD.loc[curD > 0].sum() out_stats.write(f"{col}: {round(curD_sum)}\n") - except: + except Exception: pass