-
Notifications
You must be signed in to change notification settings - Fork 13
Boyce index code #100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Boyce index code #100
Changes from 3 commits
1c638af
91b8d25
be80087
7608ee5
b4658b9
6634ca3
9c8d31d
0220a95
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -131,3 +131,7 @@ dmypy.json | |
| # conda smithy | ||
| build_artifacts | ||
| package/ | ||
|
|
||
|
|
||
| #poetry lock file | ||
| poetry.lock | ||
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| import geopandas as gpd | ||
| import numpy as np | ||
| import pandas as pd | ||
| import matplotlib.pyplot as plt | ||
| from scipy.stats import spearmanr | ||
|
|
||
|
|
||
| # implement Boyce index as describe in https://www.whoi.edu/cms/files/hirzel_etal_2006_53457.pdf (Eq.4) | ||
|
|
||
|
|
||
| def boycei(interval, obs, fit): | ||
| """ | ||
| Calculate the Boyce index for a given interval. | ||
|
|
||
| Args: | ||
| interval (tuple or list): Two elements representing the lower and upper bounds of the interval. | ||
| obs (numpy.ndarray): Observed suitability values (i.e., predictions at presence points). | ||
| fit (numpy.ndarray): Suitability values (e.g., from a raster), i.e., predictions at presence + background points. | ||
|
||
|
|
||
| Returns: | ||
| float: The ratio of observed to expected frequencies, representing the Boyce index for the given interval. | ||
| """ | ||
| # Boolean arrays for classification | ||
| fit_bin = (fit >= interval[0]) & (fit <= interval[1]) | ||
| obs_bin = (obs >= interval[0]) & (obs <= interval[1]) | ||
|
|
||
| # Compute pi and ei | ||
| pi = np.sum(obs_bin) / len(obs_bin) | ||
| ei = np.sum(fit_bin) / len(fit_bin) | ||
|
|
||
| if ei == 0: | ||
| fi = np.nan # Avoid division by zero | ||
| else: | ||
| fi = pi / ei | ||
|
|
||
| return fi | ||
|
|
||
|
|
||
| def boyce_index(fit, obs, nclass=0, window="default", res=100, PEplot=False): | ||
|
||
| """ | ||
| Compute the Boyce index to evaluate habitat suitability models. | ||
|
|
||
| The Boyce index evaluates how well a model predicts species presence by comparing its predictions | ||
| to a random distribution of observed presences along the prediction gradients. It is specifically | ||
| designed for presence-only models and serves as an appropriate metric in such cases. | ||
|
|
||
| It divides the probability of species presence into ranges and, for each range, calculates the predicted-to-expected ratio (F ratio). | ||
| The final output is given by the Spearman correlation between the mid-point of the probability interval and the F ratio. | ||
|
|
||
| Index ranges from -1 to +1: | ||
| - Positive values: Model predictions align with actual species presence distribution. | ||
| - Values near zero: Model performs similarly to random predictions. | ||
| - Negative values: Model incorrectly predicts low-quality areas where species are more frequently found. | ||
|
|
||
| This calculation is based on the continuous Boyce index (Eq. 4) as defined in Hirzel et al. 2006. | ||
|
|
||
| Args: | ||
| fit (numpy.ndarray | pd.Series | gpd.GeoSeries): Suitability values (e.g., predictions at presence + background points). | ||
| obs (numpy.ndarray | pd.Series | gpd.GeoSeries): Observed suitability values, i.e., predictions at presence points. | ||
| nclass (int | list, optional): Number of classes or list of class thresholds. Defaults to 0. | ||
| window (float | str, optional): Width of the moving window. Defaults to 'default' which sets window as 1/10th of the fit range. | ||
| res (int, optional): Resolution, i.e., number of steps if nclass=0. Defaults to 100. | ||
| PEplot (bool, optional): Whether to plot the predicted-to-expected (P/E) curve. Defaults to False. | ||
|
|
||
| Returns: | ||
| dict: A dictionary with the following keys: | ||
| - 'F.ratio' (numpy.ndarray): The P/E ratio for each bin. | ||
| - 'Spearman.cor' (float): The Spearman's rank correlation coefficient between interval midpoints and F ratios. | ||
| - 'HS' (numpy.ndarray): The habitat suitability intervals. | ||
|
|
||
| Example: | ||
| # Predicted suitability scores (e.g., predictions at presence + background points) | ||
| predicted = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) | ||
|
|
||
| # Observed presence suitability scores (e.g., predictions at presence points) | ||
| observed = np.array([0.3, 0.7, 0.8, 0.9]) | ||
|
||
|
|
||
| # Call the boyce_index function to calculate the Boyce index and Spearman correlation | ||
| results = boyce_index(fit=predicted, obs=observed, nclass=3) | ||
| print(results) | ||
|
|
||
| # Output: | ||
| # {'F.ratio': array([0.625, 0.625, 1.875]), | ||
| # 'Spearman.cor': 0.866, | ||
| # 'HS': array([[0.1 , 0.4 ], | ||
| # [0.4 , 0.7 ], | ||
| # [0.7 , 1. ]])} | ||
| """ | ||
|
|
||
|
|
||
| # Check types of fit and obs | ||
| acceptable_types = (np.ndarray, pd.Series, gpd.GeoSeries) | ||
| if not isinstance(fit, acceptable_types): | ||
| raise TypeError("The 'fit' parameter must be a NumPy array, Pandas Series, or GeoPandas GeoSeries.") | ||
| if not isinstance(obs, acceptable_types): | ||
| raise TypeError("The 'obs' parameter must be a NumPy array, Pandas Series, or GeoPandas GeoSeries.") | ||
|
|
||
|
|
||
| # Convert inputs to NumPy arrays | ||
| fit = np.asarray(fit) | ||
| obs = np.asarray(obs) | ||
|
|
||
|
|
||
| # Ensure fit and obs are one-dimensional arrays | ||
| if fit.ndim != 1 or obs.ndim != 1: | ||
| raise ValueError("Both 'fit' and 'obs' must be one-dimensional arrays.") | ||
|
|
||
|
|
||
| # Remove NaNs from fit and obs | ||
| fit = fit[~np.isnan(fit)] | ||
| obs = obs[~np.isnan(obs)] | ||
|
|
||
| if len(fit) == 0 or len(obs) == 0: | ||
| raise ValueError("After removing NaNs, 'fit' or 'obs' arrays cannot be empty.") | ||
|
|
||
|
|
||
| # Remove NaNs from fit | ||
| fit = fit[~np.isnan(fit)] | ||
|
||
|
|
||
| if window == "default": | ||
| window = (np.max(fit) - np.min(fit)) / 10.0 | ||
|
|
||
| mini = np.min(fit) | ||
| maxi = np.max(fit) | ||
|
||
|
|
||
| if nclass == 0: | ||
| vec_mov = np.linspace(mini, maxi - window, num=res+1) | ||
| intervals = np.column_stack((vec_mov, vec_mov + window)) | ||
| elif isinstance(nclass, (list, np.ndarray)) and len(nclass) > 1: | ||
| nclass.sort() | ||
| if mini > nclass[0] or maxi < nclass[-1]: | ||
| raise ValueError(f"The range provided via nclass is: ({nclass[0], nclass[-1]}). The range computed via fit is: ({mini, maxi}). Provided range via nclass should be in range computed via (max(fit), min(fit)).") | ||
| vec_mov = np.concatenate(([mini], nclass)) | ||
| intervals = np.column_stack((vec_mov[:-1], vec_mov[1:])) | ||
| print(vec_mov) | ||
| print(intervals) | ||
|
||
| elif nclass > 0: | ||
| vec_mov = np.linspace(mini, maxi, num=nclass + 1) | ||
| intervals = np.column_stack((vec_mov[:-1], vec_mov[1:])) | ||
| else: | ||
| raise ValueError("Invalid nclass value.") | ||
|
||
|
|
||
|
|
||
| # Apply boycei function to each interval | ||
| f_list = [] | ||
| for inter in intervals: | ||
| fi = boycei(inter, obs, fit) | ||
| f_list.append(fi) | ||
| f = np.array(f_list) | ||
|
|
||
|
|
||
| # Remove NaNs | ||
| valid = ~np.isnan(f) | ||
|
||
|
|
||
| # use interval midpoints to calculate the spearmanr coeff. | ||
| intervals_mid = np.mean(intervals[valid], axis=1) | ||
| if np.sum(valid) <= 2: | ||
| corr = np.nan | ||
| else: | ||
| f_valid = f[valid] | ||
| corr, _ = spearmanr(f_valid, intervals_mid) | ||
|
|
||
|
|
||
| if PEplot: | ||
|
||
| plt.figure() | ||
| plt.plot(intervals_mid, f[valid], marker='o') | ||
| plt.xlabel('Habitat suitability') | ||
| plt.ylabel('Predicted/Expected ratio') | ||
| plt.title('Boyce Index') | ||
| plt.show() | ||
|
|
||
|
|
||
| results = { | ||
| 'F.ratio': f, | ||
| 'Spearman.cor': round(corr, 3) if not np.isnan(corr) else np.nan, | ||
|
||
| 'HS': intervals, | ||
| } | ||
|
|
||
| return results | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| import numpy as np | ||
| import pytest | ||
| import matplotlib.pyplot as plt | ||
| from elapid.evaluate import boycei, boyce_index | ||
| import pandas as pd | ||
| import geopandas as gpd | ||
| from shapely.geometry import Point | ||
|
|
||
|
|
||
|
|
||
| # Test Case 1: Normal case with random data | ||
| def test_normal_case(): | ||
| np.random.seed(0) | ||
| fit = np.random.rand(1000) | ||
| obs = np.random.choice(fit, size=100, replace=False) | ||
| results = boyce_index(fit, obs, nclass=10, PEplot=False) | ||
| assert 'Spearman.cor' in results | ||
| assert 'F.ratio' in results | ||
| spearman_cor = results['Spearman.cor'] | ||
| f_ratio = results['F.ratio'] | ||
| assert not np.isnan(spearman_cor) | ||
| assert -1 <= spearman_cor <= 1 | ||
| assert len(f_ratio) == 10 | ||
| assert not np.any(np.isnan(f_ratio)) | ||
| assert np.all(f_ratio >= 0) | ||
|
|
||
| # Test Case 2: Edge case with empty 'fit' array | ||
| def test_empty_fit(): | ||
| fit = np.array([]) | ||
| obs = np.array([0.5, 0.6, 0.7]) | ||
| with pytest.raises(ValueError): | ||
| boyce_index(fit, obs, nclass=10, PEplot=False) | ||
|
|
||
| # Test Case 3: Edge case with empty 'obs' array | ||
| def test_empty_obs(): | ||
| fit = np.random.rand(1000) | ||
| obs = np.array([]) | ||
| with pytest.raises(ValueError) as exc_info: | ||
| boyce_index(fit, obs, nclass=10, PEplot=False) | ||
| assert "After removing NaNs, 'fit' or 'obs' arrays cannot be empty." in str(exc_info.value) | ||
|
|
||
| # Test Case 4: 'obs' containing NaNs | ||
| def test_obs_with_nans(): | ||
| fit = np.random.rand(1000) | ||
| obs = np.random.choice(fit, size=100, replace=False) | ||
| obs[::10] = np.nan # Introduce NaNs into 'obs' | ||
| results = boyce_index(fit, obs, nclass=10, PEplot=False) | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert 'Spearman.cor' in results | ||
| if not np.isnan(spearman_cor): | ||
| assert -1 <= spearman_cor <= 1 | ||
| f_ratio = results['F.ratio'] | ||
| assert len(f_ratio) == 10 | ||
|
|
||
| # Test Case 5: Invalid 'nclass' value (negative number) | ||
| def test_invalid_nclass(): | ||
| fit = np.random.rand(1000) | ||
| obs = np.random.choice(fit, size=100, replace=False) | ||
| with pytest.raises(ValueError): | ||
| boyce_index(fit, obs, nclass=-5, PEplot=False) | ||
|
|
||
| # Test Case 6: Custom 'window' value | ||
| def test_custom_window(): | ||
| fit = np.random.rand(1000) | ||
| obs = np.random.choice(fit, size=100, replace=False) | ||
| results = boyce_index(fit, obs, window=0.1, PEplot=False) | ||
| assert 'Spearman.cor' in results | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert not np.isnan(spearman_cor) | ||
| assert -1 <= spearman_cor <= 1 | ||
| f_ratio = results['F.ratio'] | ||
| assert len(f_ratio) > 0 | ||
|
|
||
| # Test Case 7: 'PEplot' set to True | ||
| def test_peplot_true(): | ||
| fit = np.random.rand(1000) | ||
| obs = np.random.choice(fit, size=100, replace=False) | ||
| results = boyce_index(fit, obs, nclass=10, PEplot=True) | ||
| assert 'Spearman.cor' in results | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert not np.isnan(spearman_cor) | ||
| assert -1 <= spearman_cor <= 1 | ||
| plt.close('all') # Close the plot to avoid display during testing | ||
|
|
||
| # Test Case 8: 'fit' containing NaNs | ||
| def test_fit_with_nans(): | ||
| # In this code snippet: | ||
| fit = np.random.rand(1000) | ||
| fit[::50] = np.nan # Introduce NaNs into 'fit' | ||
| obs = np.random.choice(fit[~np.isnan(fit)], size=100, replace=False) | ||
| results = boyce_index(fit, obs, nclass=10, PEplot=False) | ||
| assert 'Spearman.cor' in results | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert not np.isnan(spearman_cor) | ||
| assert -1 <= spearman_cor <= 1 | ||
| f_ratio = results['F.ratio'] | ||
| assert len(f_ratio) == 10 | ||
|
|
||
| # Test Case 9: 'obs' values outside the range of 'fit' | ||
| def test_obs_outside_fit_range(): | ||
| fit = np.random.rand(1000) | ||
| obs = np.array([1.5, 2.0, 2.5]) # Values outside the range [0, 1] | ||
| results = boyce_index(fit, obs, nclass=10, PEplot=False) | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert 'Spearman.cor' in results | ||
| assert np.isnan(spearman_cor) or -1 <= spearman_cor <= 1 | ||
| f_ratio = results['F.ratio'] | ||
| assert len(f_ratio) == 10 | ||
|
|
||
| # Test Case 10: Large dataset | ||
| def test_large_dataset(): | ||
| fit = np.random.rand(1000000) | ||
| obs = np.random.choice(fit, size=10000, replace=False) | ||
| results = boyce_index(fit, obs, nclass=20, PEplot=False) | ||
| assert 'Spearman.cor' in results | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert not np.isnan(spearman_cor) | ||
| assert -1 <= spearman_cor <= 1 | ||
| f_ratio = results['F.ratio'] | ||
| assert len(f_ratio) == 20 | ||
|
|
||
|
|
||
| # Test Case 11: Using Pandas Series | ||
| def test_with_pandas_series(): | ||
| np.random.seed(0) | ||
| fit = pd.Series(np.random.rand(1000)) | ||
| obs = fit.sample(n=100, replace=False) | ||
| results = boyce_index(fit, obs, nclass=10, PEplot=False) | ||
| assert 'Spearman.cor' in results | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert not np.isnan(spearman_cor) | ||
| assert -1 <= spearman_cor <= 1 | ||
|
|
||
| # Test Case 12: Using GeoPandas GeoSeries | ||
| def test_with_geopandas_geoseries(): | ||
| np.random.seed(0) | ||
| num_points = 1000 | ||
| x = np.random.uniform(-180, 180, num_points) | ||
| y = np.random.uniform(-90, 90, num_points) | ||
| suitability = np.random.rand(num_points) | ||
| geometry = [Point(xy) for xy in zip(x, y)] | ||
| gdf = gpd.GeoDataFrame({'suitability': suitability}, geometry=geometry) | ||
|
|
||
| fit = gdf['suitability'] # This is a Pandas Series | ||
| obs = fit.sample(n=100, replace=False) | ||
| results = boyce_index(fit, obs, nclass=10, PEplot=False) | ||
| assert 'Spearman.cor' in results | ||
| spearman_cor = results['Spearman.cor'] | ||
| assert not np.isnan(spearman_cor) | ||
| assert -1 <= spearman_cor <= 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i might prefer renaming these functions
boyce_indexandcontinuous_boyce_indexto differentiate (boycei/boyce_index are easily confused)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done as per suggestion.