diff --git a/README.md b/README.md index 77d1b26..869a2a7 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ We chose to use a deep learning convolutional neural network (CNN) to identify a These notebooks and scripts are created in python through the VS Code platform. Before running any scripts or notebooks, the user should import the necessary packages listed below, should the necessary packages not be available you can try a 'pip install **package name**' +## Installation + Relevant packages and libraries to install include : ```python @@ -30,6 +32,8 @@ The **goes2go** package is developed as an easy and efficient way to access GOES [Brian's GitHub](https://github.com/blaylockbk/goes2go) +[AWS CLI Download Instructions](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) + The next package that may be unfamiliar is subprocess. We use the subprocess package to access the GOES data stored on the AWS server. To set-up the AWS CLI on your own please follow the links under **GOES on AWS**: diff --git a/notebooks/Data_Cleaning.ipynb b/notebooks/Data_Cleaning.ipynb index 06d4690..58b5261 100644 --- a/notebooks/Data_Cleaning.ipynb +++ b/notebooks/Data_Cleaning.ipynb @@ -150,13 +150,13 @@ "metadata": {}, "outputs": [], "source": [ - "def CloudImageryProcessingFunction(file):\n", + "import numpy as np\n", + "import xarray as xr\n", + "from netCDF4 import Dataset\n", "\n", - " # for file in nc_file_list:\n", + "def CloudImageryProcessingFunction(file):\n", " print(file)\n", - "\n", " ds = xr.open_dataset(file)\n", - "\n", " file_id = Dataset(file)\n", "\n", " # Call function to calculate latitude and longitude from GOES ABI fixed grid projection data\n", @@ -167,6 +167,20 @@ " G = ds['CMI_C03'].data\n", " B = ds['CMI_C01'].data\n", "\n", + " # Define a function to remove outliers based on IQR\n", + " def remove_outliers(data):\n", + " Q1, Q3 = np.percentile(data, [25, 75])\n", + " IQR = Q3 - Q1\n", + " lower_bound = Q1 - 1.5 * IQR\n", + " upper_bound = Q3 + 1.5 * IQR\n", + " data = np.clip(data, lower_bound, upper_bound)\n", + " return data\n", + "\n", + " # Remove outliers from each channel\n", + " R = remove_outliers(R)\n", + " G = remove_outliers(G)\n", + " B = remove_outliers(B)\n", + "\n", " # Apply range limits for each channel. RGB values must be between 0 and 1\n", " R = np.clip(R, 0, 1)\n", " G = np.clip(G, 0, 1)\n", @@ -183,14 +197,10 @@ " G_true = np.maximum(G_true, 0)\n", " G_true = np.minimum(G_true, 1)\n", "\n", - "\n", " # The RGB array for the true color image\n", " RGB = np.dstack([R, G_true, B])\n", " rgb = RGB[:,:-1,:] # reverse the green???\n", "\n", - " # Assuming abi_lat and abi_lon have shapes (M, N), and R/G/B are (M, N)\n", - " # rgb = np.dstack([R, G, B]) # Stack the individual R, G, B components CHECKING THIS\n", - "\n", " return abi_lat, abi_lon, rgb\n" ] }, @@ -234,21 +244,34 @@ "metadata": {}, "outputs": [], "source": [ - "# data processing\n", - "def CloudOpticalDepthProcessingFunction(file):\n", + "import numpy as np\n", + "import xarray as xr\n", + "from netCDF4 import Dataset\n", "\n", - " # for file in nc_file_list:\n", + "# Data processing\n", + "def CloudOpticalDepthProcessingFunction(file):\n", " print(file)\n", - "\n", " ds = xr.open_dataset(file)\n", - "\n", " file_id = Dataset(file)\n", "\n", " # Call function to calculate latitude and longitude from GOES ABI fixed grid projection data\n", " abi_lat, abi_lon = calculate_degrees(file_id)\n", "\n", + " # Load Cloud Optical Depth (COD) data\n", " cod = ds['COD'].data\n", "\n", + " # Define a function to remove outliers based on IQR\n", + " def remove_outliers(data):\n", + " Q1, Q3 = np.percentile(data, [25, 75])\n", + " IQR = Q3 - Q1\n", + " lower_bound = Q1 - 1.5 * IQR\n", + " upper_bound = Q3 + 1.5 * IQR\n", + " data = np.clip(data, lower_bound, upper_bound)\n", + " return data\n", + "\n", + " # Remove outliers from the COD data\n", + " cod = remove_outliers(cod)\n", + "\n", " return abi_lat, abi_lon, cod\n" ] }, @@ -292,20 +315,34 @@ "metadata": {}, "outputs": [], "source": [ - "def ReflectedSWProcessingFunction(file):\n", + "import numpy as np\n", + "import xarray as xr\n", + "from netCDF4 import Dataset\n", "\n", - " # for file in nc_file_list:\n", + "def ReflectedSWProcessingFunction(file):\n", " print(file)\n", - "\n", " ds = xr.open_dataset(file)\n", - "\n", " file_id = Dataset(file)\n", "\n", + " # Extract latitude and longitude\n", " abi_lat = ds['lat'].values\n", " abi_lon = ds['lon'].values\n", "\n", + " # Load Reflected Shortwave Radiation (RSR) data\n", " rsr = ds['RSR'].data\n", "\n", + " # Define a function to remove outliers based on IQR\n", + " def remove_outliers(data):\n", + " Q1, Q3 = np.percentile(data, [25, 75])\n", + " IQR = Q3 - Q1\n", + " lower_bound = Q1 - 1.5 * IQR\n", + " upper_bound = Q3 + 1.5 * IQR\n", + " data = np.clip(data, lower_bound, upper_bound)\n", + " return data\n", + "\n", + " # Remove outliers from the RSR data\n", + " rsr = remove_outliers(rsr)\n", + "\n", " return abi_lat, abi_lon, rsr\n" ] }, @@ -325,7 +362,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\adhal\\AppData\\Local\\Temp\\ipykernel_37992\\3872720453.py:20: RuntimeWarning: invalid value encountered in sqrt\n", + "C:\\Users\\adhal\\AppData\\Local\\Temp\\ipykernel_29456\\3872720453.py:20: RuntimeWarning: invalid value encountered in sqrt\n", " r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var)\n" ] }, diff --git a/notebooks/Prepare_AI_Ready_Data.ipynb b/notebooks/Prepare_AI_Ready_Data.ipynb index 9cef3e5..3824c0e 100644 --- a/notebooks/Prepare_AI_Ready_Data.ipynb +++ b/notebooks/Prepare_AI_Ready_Data.ipynb @@ -129,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -181,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -571,7 +571,7 @@ " time_units: Julian days\n", " lat_units: degrees_north\n", " lon_units: degrees_east\n", - " rgb_units: RGB intensity [0-255]
  • title :
    RGB Cloud Imagery
    description :
    Combined RGB cloud imagery data over time for Julian days 91 to 100.
    source :
    GOES-16 Satellite Cloud and Moisture Imagery
    history :
    Created on 2024-10-24 by combining RGB imagery over time.
    institution :
    University of Washington
    references :
    https://registry.opendata.aws/noaa-goes/
    comment :
    Truncated longitude to match RGB data, latitude and longitude represent cloud imagery grids.
    time_units :
    Julian days
    lat_units :
    degrees_north
    lon_units :
    degrees_east
    rgb_units :
    RGB intensity [0-255]
  • " ], "text/plain": [ "\n", @@ -718,7 +718,7 @@ " rgb_units: RGB intensity [0-255]" ] }, - "execution_count": 7, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -729,7 +729,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -775,7 +775,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -825,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1213,7 +1213,7 @@ " time_units: Julian days\n", " lat_units: degrees_north\n", " lon_units: degrees_east\n", - " cod_units: dimensionless
    • time
      PandasIndex
      PandasIndex(Index([91, 92, 93, 94, 95, 96, 97, 98, 99, 100], dtype='int32', name='time'))
  • title :
    Cloud Optical Depth
    description :
    Combined cloud optical depth data over time for Julian days 91 to 100.
    source :
    GOES-16 Satellite Cloud and Moisture Imagery
    history :
    Created on 2024-10-24 by combining RGB imagery over time.
    institution :
    University of Washington
    references :
    https://registry.opendata.aws/noaa-goes/
    time_units :
    Julian days
    lat_units :
    degrees_north
    lon_units :
    degrees_east
    cod_units :
    dimensionless
  • " ], "text/plain": [ "\n", @@ -1358,7 +1358,7 @@ " cod_units: dimensionless" ] }, - "execution_count": 15, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1369,7 +1369,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -1411,7 +1411,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -1461,7 +1461,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -1849,19 +1849,19 @@ " time_units: Julian days\n", " lat_units: degrees_north\n", " lon_units: degrees_east\n", - " rsr_units: W/m2
    • time
      PandasIndex
      PandasIndex(Index([91, 92, 93, 94, 95, 96, 97, 98, 99, 100], dtype='int32', name='time'))
  • title :
    Reflected SW Radiation (RSR)
    description :
    Combined RSR data over time for Julian days 91 to 100.
    source :
    GOES-16 Satellite Cloud and Moisture Imagery
    history :
    Created on 2024-10-24 by combining RSR over time.
    institution :
    University of Washington
    references :
    https://registry.opendata.aws/noaa-goes/
    time_units :
    Julian days
    lat_units :
    degrees_north
    lon_units :
    degrees_east
    rsr_units :
    W/m2
  • " ], "text/plain": [ "\n", @@ -1926,7 +1926,7 @@ " rsr_units: W/m2" ] }, - "execution_count": 19, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -1937,7 +1937,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -1979,7 +1979,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -2010,11 +2010,6 @@ "source": [ "The above three sections save the datasets as usaeable netcdf files, we have added the metadata along with a data citation. " ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] } ], "metadata": {