diff --git a/README.md b/README.md
index 77d1b26..869a2a7 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@ We chose to use a deep learning convolutional neural network (CNN) to identify a
These notebooks and scripts are created in python through the VS Code platform.
Before running any scripts or notebooks, the user should import the necessary packages listed below, should the necessary packages not be available you can try a 'pip install **package name**'
+## Installation
+
Relevant packages and libraries to install include :
```python
@@ -30,6 +32,8 @@ The **goes2go** package is developed as an easy and efficient way to access GOES
[Brian's GitHub](https://github.com/blaylockbk/goes2go)
+[AWS CLI Download Instructions](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)
+
The next package that may be unfamiliar is subprocess. We use the subprocess package to access the GOES data stored on the AWS server.
To set-up the AWS CLI on your own please follow the links under **GOES on AWS**:
diff --git a/notebooks/Data_Cleaning.ipynb b/notebooks/Data_Cleaning.ipynb
index 06d4690..58b5261 100644
--- a/notebooks/Data_Cleaning.ipynb
+++ b/notebooks/Data_Cleaning.ipynb
@@ -150,13 +150,13 @@
"metadata": {},
"outputs": [],
"source": [
- "def CloudImageryProcessingFunction(file):\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "from netCDF4 import Dataset\n",
"\n",
- " # for file in nc_file_list:\n",
+ "def CloudImageryProcessingFunction(file):\n",
" print(file)\n",
- "\n",
" ds = xr.open_dataset(file)\n",
- "\n",
" file_id = Dataset(file)\n",
"\n",
" # Call function to calculate latitude and longitude from GOES ABI fixed grid projection data\n",
@@ -167,6 +167,20 @@
" G = ds['CMI_C03'].data\n",
" B = ds['CMI_C01'].data\n",
"\n",
+ " # Define a function to remove outliers based on IQR\n",
+ " def remove_outliers(data):\n",
+ " Q1, Q3 = np.percentile(data, [25, 75])\n",
+ " IQR = Q3 - Q1\n",
+ " lower_bound = Q1 - 1.5 * IQR\n",
+ " upper_bound = Q3 + 1.5 * IQR\n",
+ " data = np.clip(data, lower_bound, upper_bound)\n",
+ " return data\n",
+ "\n",
+ " # Remove outliers from each channel\n",
+ " R = remove_outliers(R)\n",
+ " G = remove_outliers(G)\n",
+ " B = remove_outliers(B)\n",
+ "\n",
" # Apply range limits for each channel. RGB values must be between 0 and 1\n",
" R = np.clip(R, 0, 1)\n",
" G = np.clip(G, 0, 1)\n",
@@ -183,14 +197,10 @@
" G_true = np.maximum(G_true, 0)\n",
" G_true = np.minimum(G_true, 1)\n",
"\n",
- "\n",
" # The RGB array for the true color image\n",
" RGB = np.dstack([R, G_true, B])\n",
" rgb = RGB[:,:-1,:] # reverse the green???\n",
"\n",
- " # Assuming abi_lat and abi_lon have shapes (M, N), and R/G/B are (M, N)\n",
- " # rgb = np.dstack([R, G, B]) # Stack the individual R, G, B components CHECKING THIS\n",
- "\n",
" return abi_lat, abi_lon, rgb\n"
]
},
@@ -234,21 +244,34 @@
"metadata": {},
"outputs": [],
"source": [
- "# data processing\n",
- "def CloudOpticalDepthProcessingFunction(file):\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "from netCDF4 import Dataset\n",
"\n",
- " # for file in nc_file_list:\n",
+ "# Data processing\n",
+ "def CloudOpticalDepthProcessingFunction(file):\n",
" print(file)\n",
- "\n",
" ds = xr.open_dataset(file)\n",
- "\n",
" file_id = Dataset(file)\n",
"\n",
" # Call function to calculate latitude and longitude from GOES ABI fixed grid projection data\n",
" abi_lat, abi_lon = calculate_degrees(file_id)\n",
"\n",
+ " # Load Cloud Optical Depth (COD) data\n",
" cod = ds['COD'].data\n",
"\n",
+ " # Define a function to remove outliers based on IQR\n",
+ " def remove_outliers(data):\n",
+ " Q1, Q3 = np.percentile(data, [25, 75])\n",
+ " IQR = Q3 - Q1\n",
+ " lower_bound = Q1 - 1.5 * IQR\n",
+ " upper_bound = Q3 + 1.5 * IQR\n",
+ " data = np.clip(data, lower_bound, upper_bound)\n",
+ " return data\n",
+ "\n",
+ " # Remove outliers from the COD data\n",
+ " cod = remove_outliers(cod)\n",
+ "\n",
" return abi_lat, abi_lon, cod\n"
]
},
@@ -292,20 +315,34 @@
"metadata": {},
"outputs": [],
"source": [
- "def ReflectedSWProcessingFunction(file):\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "from netCDF4 import Dataset\n",
"\n",
- " # for file in nc_file_list:\n",
+ "def ReflectedSWProcessingFunction(file):\n",
" print(file)\n",
- "\n",
" ds = xr.open_dataset(file)\n",
- "\n",
" file_id = Dataset(file)\n",
"\n",
+ " # Extract latitude and longitude\n",
" abi_lat = ds['lat'].values\n",
" abi_lon = ds['lon'].values\n",
"\n",
+ " # Load Reflected Shortwave Radiation (RSR) data\n",
" rsr = ds['RSR'].data\n",
"\n",
+ " # Define a function to remove outliers based on IQR\n",
+ " def remove_outliers(data):\n",
+ " Q1, Q3 = np.percentile(data, [25, 75])\n",
+ " IQR = Q3 - Q1\n",
+ " lower_bound = Q1 - 1.5 * IQR\n",
+ " upper_bound = Q3 + 1.5 * IQR\n",
+ " data = np.clip(data, lower_bound, upper_bound)\n",
+ " return data\n",
+ "\n",
+ " # Remove outliers from the RSR data\n",
+ " rsr = remove_outliers(rsr)\n",
+ "\n",
" return abi_lat, abi_lon, rsr\n"
]
},
@@ -325,7 +362,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "C:\\Users\\adhal\\AppData\\Local\\Temp\\ipykernel_37992\\3872720453.py:20: RuntimeWarning: invalid value encountered in sqrt\n",
+ "C:\\Users\\adhal\\AppData\\Local\\Temp\\ipykernel_29456\\3872720453.py:20: RuntimeWarning: invalid value encountered in sqrt\n",
" r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var)\n"
]
},
diff --git a/notebooks/Prepare_AI_Ready_Data.ipynb b/notebooks/Prepare_AI_Ready_Data.ipynb
index 9cef3e5..3824c0e 100644
--- a/notebooks/Prepare_AI_Ready_Data.ipynb
+++ b/notebooks/Prepare_AI_Ready_Data.ipynb
@@ -129,7 +129,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -181,7 +181,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -571,7 +571,7 @@
" time_units: Julian days\n",
" lat_units: degrees_north\n",
" lon_units: degrees_east\n",
- " rgb_units: RGB intensity [0-255]
- time: 10
- latitude: 1500
- longitude: 2499
- rgb: 3
time
(time)
int32
91 92 93 94 95 96 97 98 99 100
array([ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])
lat
(time, latitude, longitude)
float32
57.3 57.3 57.3 ... 14.64 14.64
array([[[57.29578 , 57.29578 , 57.29578 , ..., 51.35827 ,\n",
+ " rgb_units: RGB intensity [0-255]
- time: 10
- latitude: 1500
- longitude: 2499
- rgb: 3
time
(time)
int32
91 92 93 94 95 96 97 98 99 100
array([ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])
lat
(time, latitude, longitude)
float32
57.3 57.3 57.3 ... 14.64 14.64
array([[[57.29578 , 57.29578 , 57.29578 , ..., 51.35827 ,\n",
" 51.360435 , 51.362617 ],\n",
" [57.29578 , 57.29578 , 57.29578 , ..., 51.32018 ,\n",
" 51.322495 , 51.32386 ],\n",
@@ -611,7 +611,7 @@
" [15.140513 , 15.13982 , 15.139026 , ..., 14.657012 ,\n",
" 14.657232 , 14.657422 ],\n",
" [15.120589 , 15.119889 , 15.119103 , ..., 14.637847 ,\n",
- " 14.638046 , 14.638264 ]]], dtype=float32)lon
(time, latitude, longitude)
float32
0.2618 0.2618 ... -61.95 -61.93
array([[[ 0.26179945, 0.26179945, 0.26179945, ...,\n",
+ " 14.638046 , 14.638264 ]]], dtype=float32)
lon
(time, latitude, longitude)
float32
0.2618 0.2618 ... -61.95 -61.93
array([[[ 0.26179945, 0.26179945, 0.26179945, ...,\n",
" -53.053078 , -53.017624 , -52.982132 ],\n",
" [ 0.26179945, 0.26179945, 0.26179945, ...,\n",
" -53.074497 , -53.03895 , -53.00418 ],\n",
@@ -651,7 +651,7 @@
" [-113.080055 , -113.052925 , -113.02549 , ...,\n",
" -61.966927 , -61.947376 , -61.927856 ],\n",
" [-113.07482 , -113.047676 , -113.02028 , ...,\n",
- " -61.968307 , -61.94878 , -61.929237 ]]], dtype=float32)rgb
(rgb)
<U5
'red' 'green' 'blue'
- units :
- RGB intensity [0-255]
array(['red', 'green', 'blue'], dtype='<U5')
CloudImagery
(time, latitude, longitude, rgb)
float32
nan nan nan nan ... 0.0 0.0 0.0 0.0
array([[[[ nan, nan, nan],\n",
+ " -61.968307 , -61.94878 , -61.929237 ]]], dtype=float32)
rgb
(rgb)
<U5
'red' 'green' 'blue'
- units :
- RGB intensity [0-255]
array(['red', 'green', 'blue'], dtype='<U5')
CloudImagery
(time, latitude, longitude, rgb)
float32
nan nan nan nan ... 0.0 0.0 0.0 0.0
array([[[[ nan, nan, nan],\n",
" [ nan, nan, nan],\n",
" [ nan, nan, nan],\n",
" ...,\n",
@@ -691,7 +691,7 @@
" ...,\n",
" [0. , 0. , 0. ],\n",
" [0. , 0. , 0. ],\n",
- " [0. , 0. , 0. ]]]], dtype=float32)
PandasIndex
PandasIndex(Index([91, 92, 93, 94, 95, 96, 97, 98, 99, 100], dtype='int32', name='time'))
PandasIndex
PandasIndex(Index(['red', 'green', 'blue'], dtype='object', name='rgb'))
- title :
- RGB Cloud Imagery
- description :
- Combined RGB cloud imagery data over time for Julian days 91 to 100.
- source :
- GOES-16 Satellite Cloud and Moisture Imagery
- history :
- Created on 2024-10-24 by combining RGB imagery over time.
- institution :
- University of Washington
- references :
- https://registry.opendata.aws/noaa-goes/
- comment :
- Truncated longitude to match RGB data, latitude and longitude represent cloud imagery grids.
- time_units :
- Julian days
- lat_units :
- degrees_north
- lon_units :
- degrees_east
- rgb_units :
- RGB intensity [0-255]
"
+ " [0. , 0. , 0. ]]]], dtype=float32)
PandasIndex
PandasIndex(Index([91, 92, 93, 94, 95, 96, 97, 98, 99, 100], dtype='int32', name='time'))
PandasIndex
PandasIndex(Index(['red', 'green', 'blue'], dtype='object', name='rgb'))
- title :
- RGB Cloud Imagery
- description :
- Combined RGB cloud imagery data over time for Julian days 91 to 100.
- source :
- GOES-16 Satellite Cloud and Moisture Imagery
- history :
- Created on 2024-10-24 by combining RGB imagery over time.
- institution :
- University of Washington
- references :
- https://registry.opendata.aws/noaa-goes/
- comment :
- Truncated longitude to match RGB data, latitude and longitude represent cloud imagery grids.
- time_units :
- Julian days
- lat_units :
- degrees_north
- lon_units :
- degrees_east
- rgb_units :
- RGB intensity [0-255]
"
],
"text/plain": [
"\n",
@@ -718,7 +718,7 @@
" rgb_units: RGB intensity [0-255]"
]
},
- "execution_count": 7,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -729,7 +729,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -775,7 +775,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -825,7 +825,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -1213,7 +1213,7 @@
" time_units: Julian days\n",
" lat_units: degrees_north\n",
" lon_units: degrees_east\n",
- " cod_units: dimensionless- time: 10
- latitude: 1500
- longitude: 2500
time
(time)
int32
91 92 93 94 95 96 97 98 99 100
array([ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])
lat
(time, latitude, longitude)
float32
57.3 57.3 57.3 ... 14.64 14.64
array([[[57.29578 , 57.29578 , 57.29578 , ..., 51.360435 ,\n",
+ " cod_units: dimensionless
- time: 10
- latitude: 1500
- longitude: 2500
time
(time)
int32
91 92 93 94 95 96 97 98 99 100
array([ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100])
lat
(time, latitude, longitude)
float32
57.3 57.3 57.3 ... 14.64 14.64
array([[[57.29578 , 57.29578 , 57.29578 , ..., 51.360435 ,\n",
" 51.362617 , 51.3643 ],\n",
" [57.29578 , 57.29578 , 57.29578 , ..., 51.322495 ,\n",
" 51.32386 , 51.326122 ],\n",
@@ -1253,7 +1253,7 @@
" [15.140513 , 15.13982 , 15.139026 , ..., 14.657232 ,\n",
" 14.657422 , 14.657647 ],\n",
" [15.120589 , 15.119889 , 15.119103 , ..., 14.638046 ,\n",
- " 14.638264 , 14.638501 ]]], dtype=float32)lon
(time, latitude, longitude)
float32
0.2618 0.2618 ... -61.93 -61.91
array([[[ 0.26179945, 0.26179945, 0.26179945, ...,\n",
+ " 14.638264 , 14.638501 ]]], dtype=float32)
lon
(time, latitude, longitude)
float32
0.2618 0.2618 ... -61.93 -61.91
array([[[ 0.26179945, 0.26179945, 0.26179945, ...,\n",
" -53.017624 , -52.982132 , -52.947044 ],\n",
" [ 0.26179945, 0.26179945, 0.26179945, ...,\n",
" -53.03895 , -53.00418 , -52.968655 ],\n",
@@ -1293,7 +1293,7 @@
" [-113.080055 , -113.052925 , -113.02549 , ...,\n",
" -61.947376 , -61.927856 , -61.908302 ],\n",
" [-113.07482 , -113.047676 , -113.02028 , ...,\n",
- " -61.94878 , -61.929237 , -61.909668 ]]], dtype=float32)
CloudOpticalDepth
(time, latitude, longitude)
float32
nan nan nan ... 0.1245 0.1343 nan
array([[[ nan, nan, nan, ..., 30.90127 ,\n",
+ " -61.94878 , -61.929237 , -61.909668 ]]], dtype=float32)
CloudOpticalDepth
(time, latitude, longitude)
float32
nan nan nan ... 0.1245 0.1343 nan
array([[[ nan, nan, nan, ..., 30.90127 ,\n",
" 30.925686 , 39.534874 ],\n",
" [ nan, nan, nan, ..., 30.881737 ,\n",
" 39.537315 , 50.04609 ],\n",
@@ -1333,7 +1333,7 @@
" [ 1.8287809 , nan, nan, ..., 0.12452313,\n",
" 0.1294064 , nan],\n",
" [ 2.302457 , 1.4088205 , nan, ..., 0.12452313,\n",
- " 0.13428965, nan]]], dtype=float32)
PandasIndex
PandasIndex(Index([91, 92, 93, 94, 95, 96, 97, 98, 99, 100], dtype='int32', name='time'))
- title :
- Cloud Optical Depth
- description :
- Combined cloud optical depth data over time for Julian days 91 to 100.
- source :
- GOES-16 Satellite Cloud and Moisture Imagery
- history :
- Created on 2024-10-24 by combining RGB imagery over time.
- institution :
- University of Washington
- references :
- https://registry.opendata.aws/noaa-goes/
- time_units :
- Julian days
- lat_units :
- degrees_north
- lon_units :
- degrees_east
- cod_units :
- dimensionless
"
+ " 0.13428965, nan]]], dtype=float32)
PandasIndex
PandasIndex(Index([91, 92, 93, 94, 95, 96, 97, 98, 99, 100], dtype='int32', name='time'))
- title :
- Cloud Optical Depth
- description :
- Combined cloud optical depth data over time for Julian days 91 to 100.
- source :
- GOES-16 Satellite Cloud and Moisture Imagery
- history :
- Created on 2024-10-24 by combining RGB imagery over time.
- institution :
- University of Washington
- references :
- https://registry.opendata.aws/noaa-goes/
- time_units :
- Julian days
- lat_units :
- degrees_north
- lon_units :
- degrees_east
- cod_units :
- dimensionless
"
],
"text/plain": [
"\n",
@@ -1358,7 +1358,7 @@
" cod_units: dimensionless"
]
},
- "execution_count": 15,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -1369,7 +1369,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -1411,7 +1411,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -1461,7 +1461,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -1849,19 +1849,19 @@
" time_units: Julian days\n",
" lat_units: degrees_north\n",
" lon_units: degrees_east\n",
- " rsr_units: W/m2PandasIndex
PandasIndex(Index([91, 92, 93, 94, 95, 96, 97, 98, 99, 100], dtype='int32', name='time'))
- title :
- Reflected SW Radiation (RSR)
- description :
- Combined RSR data over time for Julian days 91 to 100.
- source :
- GOES-16 Satellite Cloud and Moisture Imagery
- history :
- Created on 2024-10-24 by combining RSR over time.
- institution :
- University of Washington
- references :
- https://registry.opendata.aws/noaa-goes/
- time_units :
- Julian days
- lat_units :
- degrees_north
- lon_units :
- degrees_east
- rsr_units :
- W/m2
"
],
"text/plain": [
"\n",
@@ -1926,7 +1926,7 @@
" rsr_units: W/m2"
]
},
- "execution_count": 19,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -1937,7 +1937,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -1979,7 +1979,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -2010,11 +2010,6 @@
"source": [
"The above three sections save the datasets as usaeable netcdf files, we have added the metadata along with a data citation. "
]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": []
}
],
"metadata": {