Skip to content

Mixture of Gaussian distribution #187

@coschroeder

Description

@coschroeder

Hi,
I am struggling with creating a mixture of Gaussian distribution. I tried out several ways but none of it seemed to be correct.
Here is one possibility. But I think I am missing something, maybe you can help out?

from chaospy import Dist
from scipy import special
import numpy as np
import numpy

import matplotlib.pyplot as plt
import seaborn as sns

# Define MoG class
class MoG(Dist):

    def __init__(self, loc=[[-5, -5],[5,5]], scale=[[[1, .9], [.9, 1]], [[1, -.9], [-.9, 1]]], a=[0.7,0.3]):
        loc = numpy.asfarray(loc)        
        scale = numpy.asfarray(scale)
        a = numpy.asfarray(a)
        assert np.isclose(np.sum(a),1)
        assert len(loc) == len(scale)
        self._repr = {"loc": loc.tolist(), "scale": scale.tolist()}

        C = np.array([numpy.linalg.cholesky(scale[i]) for i in range(len(a))])
        Ci = np.array([ numpy.linalg.inv(C[i]) for i in range(len(a))])
        Dist.__init__(self, C=C, Ci=Ci, loc=loc, a=a)

    def _cdf(self, x, C, Ci, loc, a):
        """
        taking the weighted sum over all components
        """
        out = a[0]* special.ndtr(numpy.dot(Ci[0], (x.T-loc[0].T).T))
        for i in range(1, len(a)):
            out += a[i]* special.ndtr(numpy.dot(Ci[i], (x.T-loc[i].T).T))
        return out
    
    def _bnd(self, x, C, Ci, loc, a):
        """
        boundaries are not yet correctly scaled. 
        But it should work for distributions in right range.
        """
        scale = numpy.sqrt(numpy.diag(numpy.dot(C[0],C[0].T)))
        lo,up = numpy.zeros((2,)+x.shape)
        lo.T[:] = (-20+loc[0])# (-7.5*scale+loc[0])
        up.T[:] = (20+loc[0]) #(7.5*scale+loc[0])
        return lo, up
    
    def __len__(self):
        return len(self.prm["C"])
    
    
# create with np for comparison
def np_MoG(loc,scale,a,n=1):
    choices = np.random.choice(len(a), size=n, replace=True, p=a)
    counts = [sum(choices==i) for i in range(len(a)) ]
    samples=[]
    for i in range(0,len(a)):
        samples.append(np.random.multivariate_normal(loc[i], scale[i], counts[i]))
    
    samples = np.array(samples)
    samples = np.vstack(samples)
    np.random.shuffle(samples)
    
    return samples

And creating samples:

# create samples with cp
loc=[[-5, -5],[5,5]] 
scale=[[[1, 0.9], [0.9, 1]], [[1, 0], [0, 1]]]
a=[0.5,0.5]

R = MoG(loc=loc, scale=scale, a=a)
samples = R.sample(10000)


# reference samples with np
samples_np = np_MoG(loc,scale,a,n=10000)


# and plot everything
plt.figure(1, figsize=(12,8))
plt.subplot(2,2,1)
sns.distplot(samples[0], bins=np.arange(-10,20,0.5))
sns.distplot(samples_np[:,0], bins=np.arange(-10,20,0.5))
plt.subplot(2,2,4)
sns.distplot(samples[1], bins=np.arange(-10,20,0.5),label='cp samples')
sns.distplot(samples_np[:,1], bins=np.arange(-10,20,0.5), label='np samples')
plt.legend()
plt.subplot(2,2,2)
sns.kdeplot(samples[1], samples[0])
sns.kdeplot(samples_np[:,1], samples_np[:,0])

plt.xlim(-10,20)

image

Metadata

Metadata

Assignees

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions