-
Notifications
You must be signed in to change notification settings - Fork 90
Open
Labels
Description
Hi,
I am struggling with creating a mixture of Gaussian distribution. I tried out several ways but none of it seemed to be correct.
Here is one possibility. But I think I am missing something, maybe you can help out?
from chaospy import Dist
from scipy import special
import numpy as np
import numpy
import matplotlib.pyplot as plt
import seaborn as sns
# Define MoG class
class MoG(Dist):
def __init__(self, loc=[[-5, -5],[5,5]], scale=[[[1, .9], [.9, 1]], [[1, -.9], [-.9, 1]]], a=[0.7,0.3]):
loc = numpy.asfarray(loc)
scale = numpy.asfarray(scale)
a = numpy.asfarray(a)
assert np.isclose(np.sum(a),1)
assert len(loc) == len(scale)
self._repr = {"loc": loc.tolist(), "scale": scale.tolist()}
C = np.array([numpy.linalg.cholesky(scale[i]) for i in range(len(a))])
Ci = np.array([ numpy.linalg.inv(C[i]) for i in range(len(a))])
Dist.__init__(self, C=C, Ci=Ci, loc=loc, a=a)
def _cdf(self, x, C, Ci, loc, a):
"""
taking the weighted sum over all components
"""
out = a[0]* special.ndtr(numpy.dot(Ci[0], (x.T-loc[0].T).T))
for i in range(1, len(a)):
out += a[i]* special.ndtr(numpy.dot(Ci[i], (x.T-loc[i].T).T))
return out
def _bnd(self, x, C, Ci, loc, a):
"""
boundaries are not yet correctly scaled.
But it should work for distributions in right range.
"""
scale = numpy.sqrt(numpy.diag(numpy.dot(C[0],C[0].T)))
lo,up = numpy.zeros((2,)+x.shape)
lo.T[:] = (-20+loc[0])# (-7.5*scale+loc[0])
up.T[:] = (20+loc[0]) #(7.5*scale+loc[0])
return lo, up
def __len__(self):
return len(self.prm["C"])
# create with np for comparison
def np_MoG(loc,scale,a,n=1):
choices = np.random.choice(len(a), size=n, replace=True, p=a)
counts = [sum(choices==i) for i in range(len(a)) ]
samples=[]
for i in range(0,len(a)):
samples.append(np.random.multivariate_normal(loc[i], scale[i], counts[i]))
samples = np.array(samples)
samples = np.vstack(samples)
np.random.shuffle(samples)
return samples
And creating samples:
# create samples with cp
loc=[[-5, -5],[5,5]]
scale=[[[1, 0.9], [0.9, 1]], [[1, 0], [0, 1]]]
a=[0.5,0.5]
R = MoG(loc=loc, scale=scale, a=a)
samples = R.sample(10000)
# reference samples with np
samples_np = np_MoG(loc,scale,a,n=10000)
# and plot everything
plt.figure(1, figsize=(12,8))
plt.subplot(2,2,1)
sns.distplot(samples[0], bins=np.arange(-10,20,0.5))
sns.distplot(samples_np[:,0], bins=np.arange(-10,20,0.5))
plt.subplot(2,2,4)
sns.distplot(samples[1], bins=np.arange(-10,20,0.5),label='cp samples')
sns.distplot(samples_np[:,1], bins=np.arange(-10,20,0.5), label='np samples')
plt.legend()
plt.subplot(2,2,2)
sns.kdeplot(samples[1], samples[0])
sns.kdeplot(samples_np[:,1], samples_np[:,0])
plt.xlim(-10,20)
