| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 Probability density functions.
3
4 This module defines L{AbstractDensity}: a common interface for all PDFs.
5 Each L{AbstractDensity} describes a specific type of probability distribution,
6 for example L{Normal} is an implementation of the Gaussian distribution:
7
8 >>> pdf = Normal(mu=10, sigma=1.1)
9 >>> pdf.mu, pdf['sigma']
10 10.0, 1.1
11
12 Every PDF provides an implementation of the L{AbstractDensity.evaluate}
13 method, which evaluates the PDF for a list of input data points:
14
15 >>> pdf.evaluate([10, 9, 11, 12])
16 array([ 0.3626748 , 0.2399147 , 0.2399147 , 0.06945048])
17
18 PDF instances also behave like functions:
19
20 >>> pdf(data) # the same as pdf.evaluate(data)
21
22 Some L{AbstractDensity} implementations may support drawing random numbers from
23 the distribution (or raise an exception otherwise):
24
25 >>> pdf.random(2)
26 array([ 9.86257083, 9.73760515])
27
28 Each implementation of L{AbstractDensity} may support infinite number of estimators,
29 used to estimate and re-initialize the PDF parameters from a set of observed data
30 points:
31
32 >>> pdf.estimate([5, 5, 10, 10])
33 >>> pdf.mu, pdf.sigma
34 (7.5, 2.5)
35 >>> pdf.estimator
36 <csb.statistics.pdf.GaussianMLEstimator>
37
38 Estimators implement the L{AbstractEstimator} interface. They are treated as
39 pluggable tools, which can be exchanged through the L{AbstractDensity.estimator}
40 property (you could create, initialize and plug your own estimator as well).
41 This is a classic Strategy pattern.
42 """
43
44 import numpy.random
45 import scipy.special
46 import csb.core
47
48 from abc import ABCMeta, abstractmethod
49 from csb.core import OrderedDict
50
51 from csb.numeric import log, exp, psi, inv_psi, EULER_MASCHERONI
52 from scipy.special import gammaln
53 from numpy import array, fabs, power, sqrt, pi, mean, median, clip
58
61
63
65
66 self.param = param
67 self.value = value
68
69 super(ParameterValueError, self).__init__(param, value)
70
73
76
78 """
79 Density parameter estimation strategy.
80 """
81
82 __metaclass__ = ABCMeta
83
84 @abstractmethod
86 """
87 Estimate the parameters of the distribution from same {data}.
88
89 @param context: context distribution
90 @type context: L{AbstractDensity}
91 @param data: sample values
92 @type data: array
93
94 @return: a new distribution, initialized with the estimated parameters
95 @rtype: L{AbstractDensity}
96
97 @raise EstimationFailureError: if estimation is not possible
98 """
99 pass
100
107
118
129
143
163
165
167
168 self._minbeta = minbeta
169 self._maxbeta = maxbeta
170 self._step = step
171
172 super(GenNormalBruteForceEstimator, self).__init__()
173
175
176 pdf = GeneralizedNormal(1, 1, 1)
177 data = array(data)
178 logl = []
179
180 for beta in numpy.arange(self._minbeta, self._maxbeta, self._step):
181
182 self.update(pdf, data, beta)
183
184 l = pdf.log_prob(data).sum()
185 logl.append([beta, l])
186
187 logl = numpy.array(logl)
188
189 # optimal parameters:
190 beta = logl[ numpy.argmax(logl[:, 1]) ][0]
191 self.update(pdf, data, beta)
192
193 return pdf
194
196
197 mu = median(data)
198 v = mean((data - mu) ** 2)
199 alpha = sqrt(v * exp(gammaln(1. / beta) - gammaln(3. / beta)))
200
201 return mu, alpha
202
212
220
222
227
229
230 log_p = numpy.mean(log(data), 0)
231
232 e = numpy.mean(data, 0)
233 v = numpy.mean(data ** 2, 0)
234 q = (e[0] - v[0]) / (v[0] - e[0] ** 2)
235
236 a = e * q
237 y = a * 0
238 k = 0
239 while(sum(abs(y - a)) > self.tol and k < self.n_iter):
240 y = psi(sum(a)) + log_p
241 a = numpy.array(list(map(inv_psi, y)))
242 k += 1
243
244 return Dirichlet(a)
245
256
267
270 """
271 Defines the interface and common operations for all probability density
272 functions.
273
274 Subclasses must complete the implementation by implementing the
275 L{AbstractDensity.log_prob} method. Subclasses could also consider--but
276 are not obliged to--override the L{AbstractDensity.random} method. If
277 any of the density parameters need validation, subclasses are expected to
278 override the L{AbstractDensity._validate} method and raise
279 L{ParameterValueError} on validation failure. Note that implementing
280 parameter validation in property setters has almost no effect and is
281 discouraged.
282 """
283
284 __metaclass__ = ABCMeta
285
286
288
289 self._params = OrderedDict()
290 self._estimator = None
291
292 self.estimator = NullEstimator()
293
295
296 if param in self._params:
297 return self._params[param]
298 else:
299 raise ParameterNotFoundError(param)
300
302
303 if param in self._params:
304 if csb.core.iterable(value):
305 value = array(value)
306 else:
307 value = float(value)
308
309 self._validate(param, value)
310 self._params[param] = value
311 else:
312 raise ParameterNotFoundError(param)
313
314 @property
317 @estimator.setter
319 if not isinstance(strategy, AbstractEstimator):
320 raise TypeError(strategy)
321 self._estimator = strategy
322
325
327 name = self.__class__.__name__
328 params = ', '.join([ '{0}={1}'.format(p, v) for p, v in self._params.items() ])
329
330 return '{0}({1})'.format(name, params)
331
333 """
334 Register a new parameter name.
335 """
336 if name not in self._params:
337 self._params[name] = None
338
340 """
341 Parameter value validation hook.
342 @raise ParameterValueError: on failed validation (value not accepted)
343 """
344 pass
345
348
350
351 for p, v in zip(self.parameters, values):
352 self[p] = v
353
354 for p in named_params:
355 self[p] = named_params[p]
356
357 @property
363
364 @abstractmethod
366 """
367 Evaluate the logarithm of the probability of observing values C{x}.
368
369 @param x: values
370 @type x: array
371 @rtype: array
372 """
373 pass
374
376 """
377 Evaluate the probability of observing values C{x}.
378
379 @param x: values
380 @type x: array
381 @rtype: array
382 """
383 x = numpy.array(x)
384 return exp(self.log_prob(x))
385
387 """
388 Generate random samples from the probability distribution.
389
390 @param size: number of values to sample
391 @type size: int
392 """
393 raise NotImplementedError()
394
396 """
397 Estimate and load the parameters of the distribution from sample C{data}
398 using the current L{AbstractEstimator} strategy.
399
400 @param data: sample values
401 @type data: array
402
403 @raise NotImplementedError: when no estimator is available for this
404 distribution
405 @raise IncompatibleEstimatorError: when the current estimator is not
406 compatible with this pdf
407 """
408
409 try:
410 pdf = self.estimator.estimate(self, data)
411
412 for param in pdf.parameters:
413 self[param] = pdf[param]
414
415 except ParameterNotFoundError as e:
416 raise IncompatibleEstimatorError(self.estimator)
417
418 except ParameterValueError as e:
419 raise EstimationFailureError(e.param, e.value)
420
422
424
425 super(Laplace, self).__init__()
426
427 self._register('mu')
428 self._register('b')
429
430 self.set_params(b=b, mu=mu)
431 self.estimator = LaplaceMLEstimator()
432
437
438 @property
441 @b.setter
443 self['b'] = value
444
445 @property
448 @mu.setter
450 self['mu'] = value
451
458
465
467
469
470 super(Normal, self).__init__()
471
472 self._register('mu')
473 self._register('sigma')
474
475 self.set_params(mu=mu, sigma=sigma)
476 self.estimator = GaussianMLEstimator()
477
478 @property
481 @mu.setter
483 self['mu'] = value
484
485 @property
488 @sigma.setter
490 self['sigma'] = value
491
493
494 mu = self.mu
495 sigma = self.sigma
496
497 return log(1.0 / sqrt(2 * pi * sigma ** 2)) - (x - mu) ** 2 / (2 * sigma ** 2)
498
505
507
509
510 super(InverseGaussian, self).__init__()
511
512 self._register('mu')
513 self._register('shape')
514
515 self.set_params(mu=mu, shape=shape)
516 self.estimator = InverseGaussianMLEstimator()
517
522
523 @property
526 @mu.setter
528 self['mu'] = value
529
530 @property
533 @shape.setter
535 self['shape'] = value
536
538
539 mu = self.mu
540 scale = self.shape
541 x = numpy.array(x)
542
543 if numpy.min(x) <= 0:
544 raise ValueError('InverseGaussian is defined for x > 0')
545
546 y = -0.5 * scale * (x - mu) ** 2 / (mu ** 2 * x)
547 z = 0.5 * (log(scale) - log(2 * pi * x ** 3))
548 return z + y
549
550
552
553 mu = self.mu
554 shape = self.shape
555
556 mu_2l = mu / shape / 2.
557 Y = numpy.random.standard_normal(size)
558 Y = mu * Y ** 2
559 X = mu + mu_2l * (Y - sqrt(4 * shape * Y + Y ** 2))
560 U = numpy.random.random(size)
561
562 m = numpy.less_equal(U, mu / (mu + X))
563
564 return m * X + (1 - m) * mu ** 2 / X
565
567
569
570 super(GeneralizedNormal, self).__init__()
571
572 self._register('mu')
573 self._register('alpha')
574 self._register('beta')
575
576 self.set_params(mu=mu, alpha=alpha, beta=beta)
577 self.estimator = GenNormalBruteForceEstimator()
578
583
584 @property
587 @mu.setter
589 self['mu'] = value
590
591 @property
594 @alpha.setter
596 self['alpha'] = value
597
598 @property
601 @beta.setter
603 self['beta'] = value
604
612
614
616 super(GeneralizedInverseGaussian, self).__init__()
617
618 self._register('a')
619 self._register('b')
620 self._register('p')
621 self.set_params(a=a, b=b, p=p)
622
623 self.estimator = NullEstimator()
624
629
630 @property
633 @a.setter
635 self['a'] = value
636
637 @property
640 @b.setter
642 self['b'] = value
643
644 @property
647 @p.setter
649 self['p'] = value
650
652
653 a = self['a']
654 b = self['b']
655 p = self['p']
656
657 lz = 0.5 * p * (log(a) - log(b)) - log(2 * scipy.special.kv(p, sqrt(a * b)))
658
659 return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
660
662
663 from csb.statistics.rand import inv_gaussian
664
665 rvs = []
666 burnin = 10
667 a = self['a']
668 b = self['b']
669 p = self['p']
670
671 s = a * 0. + 1.
672
673 if p < 0:
674 a, b = b, a
675
676 if size == None:
677 size = 1
678 for i in range(int(size)):
679 for j in range(burnin):
680
681 l = b + 2 * s
682 m = sqrt(l / a)
683
684 x = inv_gaussian(m, l, shape=m.shape)
685 s = numpy.random.gamma(abs(p) + 0.5, x)
686
687 if p >= 0:
688 rvs.append(x)
689 else:
690 rvs.append(1 / x)
691
692 return numpy.array(rvs)
693
695
697 super(Gamma, self).__init__()
698
699 self._register('alpha')
700 self._register('beta')
701
702 self.set_params(alpha=alpha, beta=beta)
703 self.estimator = GammaMLEstimator()
704
709
710 @property
713 @alpha.setter
715 self['alpha'] = value
716
717 @property
720 @beta.setter
722 self['beta'] = value
723
725
726 a, b = self['alpha'], self['beta']
727
728 return a * log(b) - gammaln(clip(a, 1e-308, 1e308)) + \
729 (a - 1) * log(clip(x, 1e-308, 1e308)) - b * x
730
733
735
737 super(InverseGamma, self).__init__()
738
739 self._register('alpha')
740 self._register('beta')
741
742 self.set_params(alpha=alpha, beta=beta)
743 self.estimator = NullEstimator()
744
749
750 @property
753 @alpha.setter
755 self['alpha'] = value
756
757 @property
760 @beta.setter
762 self['beta'] = value
763
765 a, b = self['alpha'], self['beta']
766 return a * log(b) - gammaln(a) - (a + 1) * log(x) - b / x
767
770
772
774
775 super(MultivariateGaussian, self).__init__(mu, sigma)
776 self.estimator = MultivariateGaussianMLEstimator()
777
780
782
783 from numpy.linalg import det
784
785 mu = self.mu
786 S = self.sigma
787 D = len(mu)
788 q = self.__q(x)
789 return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q ** 2
790
792 from numpy import sum, dot, reshape
793 from numpy.linalg import inv
794
795 mu = self.mu
796 S = self.sigma
797
798 return sqrt(clip(sum(reshape((x - mu) * dot(x - mu, inv(S).T.squeeze()), (-1, len(mu))), -1), 0., 1e308))
799
801 """
802 Return the distribution along the dimensions
803 dims conditioned on x
804
805 @param x: conditional values
806 @param dims: new dimensions
807 """
808 from numpy import take, dot
809 from numpy.linalg import inv
810
811 dims2 = [i for i in range(self['mu'].shape[0]) if not i in dims]
812
813 mu1 = take(self['mu'], dims)
814 mu2 = take(self['mu'], dims2)
815
816 # x1 = take(x, dims)
817 x2 = take(x, dims2)
818
819 A = take(take(self['Sigma'], dims, 0), dims, 1)
820 B = take(take(self['Sigma'], dims2, 0), dims2, 1)
821 C = take(take(self['Sigma'], dims, 0), dims2, 1)
822
823 mu = mu1 + dot(C, dot(inv(B), x2 - mu2))
824 Sigma = A - dot(C, dot(inv(B), C.T))
825
826 return MultivariateGaussian((mu, Sigma))
827
829
831 super(Dirichlet, self).__init__()
832
833 self._register('alpha')
834
835 self.set_params(alpha=alpha)
836 self.estimator = DirichletEstimator()
837
838 @property
841
842 @alpha.setter
844 self['alpha'] = numpy.ravel(value)
845
847 #TODO check wether x is in the probability simplex
848 alpha = self.alpha
849 return gammaln(sum(alpha)) - sum(gammaln(alpha)) \
850 + numpy.dot((alpha - 1).T, log(x).T)
851
854
857
859 super(GumbelMinimum, self).__init__()
860
861 self._register('mu')
862 self._register('beta')
863
864 self.set_params(mu=mu, beta=beta)
865 self.estimator = GumbelMinMomentsEstimator()
866
871
872 @property
875 @mu.setter
877 self['mu'] = value
878
879 @property
882 @beta.setter
884 self['beta'] = value
885
887
888 mu = self.mu
889 beta = self.beta
890
891 z = (x - mu) / beta
892 return log(1. / beta) + z - exp(z)
893
900
922
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Thu Jun 20 10:55:50 2013 | http://epydoc.sourceforge.net |