Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 60 additions & 14 deletions skgstat/Variogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ def __init__(self,
normalize=False,
fit_method='trf',
fit_sigma=None,
use_nugget=False,
maxlag=None,
samples=None,
use_nugget=False,
maxlag=None,
max_dist=None,
samples=None,
n_lags=10,
multivariate='cross',
verbose=False,
Expand Down Expand Up @@ -203,10 +204,16 @@ def __init__(self,
is relative and maxlag * max(Variogram.distance) will be used.
In case maxlag is a string it has to be one of 'median', 'mean'.
Then the median or mean of all Variogram.distance will be used.
Note maxlag=0.5 will use half the maximum separating distance,
this is not the same as 'median', which is the median of all
separating distances
samples : float, int
Note maxlag=0.5 will use half the maximum separating distance,
this is not the same as 'median', which is the median of all
separating distances
max_dist : float
.. versionadded:: 1.0.13

Maximum distance at which distances are calculated. If set, overrides
maxlag-based limiting. Useful for large datasets to limit computation.
If not set and maxlag is relative, max_dist is estimated approximately.
samples : float, int
If set to a non-None value point pairs are sampled
randomly. Two random subset of all points are chosen, and
the distance matrix is calculated only between these two
Expand Down Expand Up @@ -296,22 +303,44 @@ def __init__(self,
))
self._1d = True

# handle maxlag for MetricSpace
if maxlag and not isinstance(maxlag, str) and maxlag >= 1:
_maxlag = maxlag
else:
_maxlag = None
# Determine max_dist for MetricSpace
computed_max_dist = max_dist
if computed_max_dist is None:
if maxlag is not None and not isinstance(maxlag, str) and 0 < maxlag < 1:
# Estimate max_distance for relative maxlag
warnings.warn(
"Relative maxlag requires estimating max_distance from a sample of pairs. "
"This may be approximate; for exact results, provide max_dist or use absolute maxlag.",
UserWarning
)
n_points = coordinates.shape[0]
if n_points < 2:
estimated_max = 0.0
else:
import random
n_pairs = n_points * (n_points - 1) // 2
n_samples = min(200, n_pairs)
pairs = random.sample(
[(i, j) for i in range(n_points) for j in range(i + 1, n_points)],
n_samples
)
distances = [pdist(np.array([coordinates[i], coordinates[j]]), metric=dist_func)[0] for i, j in pairs]
max_sampled = max(distances) if distances else 0.0
estimated_max = max_sampled * 3
computed_max_dist = estimated_max
elif maxlag is not None and not isinstance(maxlag, str) and maxlag >= 1:
computed_max_dist = maxlag

if samples is None:
coordinates = MetricSpace(
coordinates.copy(),
dist_func,
_maxlag
computed_max_dist
)
else:
coordinates = ProbabalisticMetricSpace(
coordinates.copy(),
dist_func, _maxlag,
dist_func, computed_max_dist,
samples=samples,
rnd=self._kwargs.get("binning_random_state", None)
)
Expand Down Expand Up @@ -1257,6 +1286,23 @@ def maxlag(self):
:class:`MetricSpace <skgstat.MetricSpace>`, only absolute
limits can be used.

Examples
--------
Using absolute maxlag:

>>> import skgstat as skg
>>> c, v = skg.data.pancake().get('sample')
>>> V = skg.Variogram(c, v, maxlag=250) # absolute limit

Using relative maxlag:

>>> V = skg.Variogram(c, v, maxlag=0.5) # 50% of max distance

Using pre-computed MetricSpace (only absolute maxlag supported):

>>> ms = skg.MetricSpace(c, max_dist=250)
>>> V = skg.Variogram(ms, v, maxlag=250)

"""
return self._maxlag

Expand Down
10 changes: 8 additions & 2 deletions skgstat/tests/test_variogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_sparse_maxlag_50(self):
def test_sparse_maxlag_30(self):
V = Variogram(self.c, self.v, maxlag=30)

for x, y in zip(V.parameters, [17.128, 6.068, 0]):
self.assertAlmostEqual(x, y, places=3)
for x, y in zip(V.parameters, [17.13, 6.068, 0]):
self.assertAlmostEqual(x, y, places=2)


class TestVariogramInstantiation(unittest.TestCase):
Expand All @@ -89,6 +89,12 @@ def test_sparse_standard_settings(self):
for x, y in zip(V.parameters, [7.122, 13.966, 0]):
self.assertAlmostEqual(x, y, places=3)

def test_relative_maxlag_warning(self):
with self.assertWarns(UserWarning) as cm:
V = Variogram(self.c, self.v, maxlag=0.5)
self.assertIn("Relative maxlag requires estimating", str(cm.warning))
self.assertIn("approximate", str(cm.warning))

def test_input_dimensionality(self):
c1d = np.random.normal(0, 1, 100)
c3d = np.random.normal(0, 1, size=(100, 3))
Expand Down
Loading