mim
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎README.txt
+54 b/‎README.txt
+54
diff --git a/‎argmax.m
+15 b/‎argmax.m
+15
diff --git a/‎ars.m
+127 b/‎ars.m
+127
diff --git a/‎drawGmm.m
+29 b/‎drawGmm.m
+29
diff --git a/‎drawMultinom.m
+15 b/‎drawMultinom.m
+15
diff --git a/‎drawSpiral.m
+19 b/‎drawSpiral.m
+19
diff --git a/‎gibbsGmm.m
+97 b/‎gibbsGmm.m
+97
@@ -0,0 +1 @@
+*~
@@ -0,0 +1,54 @@
+Michael Mandel
+CS 4771 Final Project
+The Infinite Gaussian Mixture Model
+Prof. Tony Jebara
+May 5, 2005
+
+In order to generate the test data used in the paper, just make this
+call in matlab:
+[Y,z] = drawGmm([-3 3], [1 10], [1 2], 500);
+
+In order to run the infinite GMM on the data for 10000 iterations,
+make this call:
+Samp = igmm_uv(Y, 10000);
+
+It's as easy as that.
+
+If you want to run the regular univariate Gibbs Sampler on the data,
+do this: 
+[mu,sigSq,p,z,churn] = gibbsGmm(Y,2,0,100,2,1,2,1000);
+
+The igmm for multivariate data is in igmm_mv.m, which uses
+logmvbetpdf.m instead of the logbetapdf.m used by igmm_uv.m.
+Otherwise, both igmms are self-contained.
+
+To generate multivariate data, use e.g.
+S = [2 1; 1 2]; S(:,:,2) = S
+[Y,z] = drawGmm([3 -3; -3 3], S, [1 1], 100);
+Samp = igmm_mv(Y, 10000);
+
+To generate figure 3 in the paper, use the function plotAutoCov.m
+
+
+
+=====================================================
+COPYRIGHT / LICENSE
+=====================================================
+All code was written by Michael Mandel, and is copyrighted under the
+(lesser) GPL:
+  Copyright (C) 2005  Michael Mandel
+ 
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public License
+as published by the Free Software Foundation; version 2.1 or later.
+ 
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Lesser General Public License for more details.
+ 
+You should have received a copy of the GNU Lesser General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ 
+The authors may be contacted via email at: mim at ee columbia edu
@@ -0,0 +1,15 @@
+function am = argmax(X, dim)
+
+% am = argmax(X, dim)
+%
+% Find the index of the maximum value of the matrix X.  If dime is
+% supplied, find the maximum index along the dimension dim.
+
+% Copyright (C) 2005 Michael Mandel, mim at ee columbia edu;
+% distributable under GPL
+
+if(nargin < 2)
+  [dummy, am] = max(X);
+else
+  [dummy, am] = max(X, [], dim);
+end
@@ -0,0 +1,127 @@
+function samples = ars(logpdf, pdfargs, N, xi, support)
+
+% Perform adaptive rejection sampling as described in gilks & wild
+% '92, and wild & gilks 93.  The PDF must be log-concave.  Draw N
+% samples from the pdf passed in as a function handle to its log.  The
+% log could be offset by an additive constant, corresponding to an
+% unnormalized distribution.  
+%
+% The pdf function should have prototype [h, hprime] = logpdf(x,
+% pdfargs{:}), where x could be a vector of points, h is the value of
+% the log pdf and hprime is its derivative.
+% 
+% The xi argument to this function is a number of points to initially
+% evaluate the pdf at, which must be on either side of the
+% distribution's mode.  And the support is a 2-vector specifying the
+% support of the pdf, defaults to [-inf inf].
+%
+% This function does not use the lower squeezing bound because it
+% is optimized for generating a small number of samples each call.
+
+% Copyright (C) 2005 Michael Mandel, mim at ee columbia edu;
+% distributable under GPL, see README.txt
+
+samples = [];
+
+% Don't need to approximate the curve too well, all the sorting and
+% whatnot gets expensive
+Nxmax = 50;
+
+if(nargin < 5) support = [-inf inf]; end
+
+x = sort(xi);
+[h, hprime] = feval(logpdf, x, pdfargs{:});
+if(~isfinite(h(1))) 
+  x
+  h
+  hprime
+  logpdf
+  pdfargs{:}
+  size(pdfargs)
+  det(pdfargs{2})
+  error('h not finite'); 
+end
+
+if(support(1) == 0)
+  % Cheat!  Get closer and closer to 0 as needed
+  while(hprime(1) < 0)
+    xt = x(1)/2;
+    [ht,hpt] = feval(logpdf, xt, pdfargs{:});
+    [x,z,h,hprime,hu,sc,cu] = insert(x,xt,h,ht,hprime,hpt,support);
+  end
+
+  while(hprime(end) > 0)
+    xt = x(end)*2;
+    [ht,hpt] = feval(logpdf, xt, pdfargs{:});
+    [x,z,h,hprime,hu,sc,cu] = insert(x,xt,h,ht,hprime,hpt,support);
+  end
+end
+
+if(hprime(1) < 0 || hprime(end) > 0)
+  % If the lower bound isn't 0, can't help it (for now)
+  error(['Starting points ' num2str(x) ' do not enclose the' ...
+	' mode']);
+end
+
+
+% Avoid under/overflow errors. the envelope and pdf are only
+% proporitional to the true pdf, so we can choose any constant
+% of proportionality.
+offset = max(h);
+h = h-offset;
+
+[x,z,h,hprime,hu,sc,cu] = insert(x,[], h,[], hprime,[], support);
+
+Nsamp = 0;
+while Nsamp < N
+  % Draw 2 random numbers in [0,1]
+  u = rand(1,2);
+  
+  % Find the largest z such that sc(z) < u
+  idx = find(sc/cu < u(1));  
+  idx = idx(end);
+  
+  % Figure out the x in that segment that u corresponds to
+  xt = x(idx) + (-h(idx) + log(hprime(idx)*(cu*u(1) - sc(idx)) + ...
+      exp(hu(idx)))) / hprime(idx);
+  [ht,hpt] = feval(logpdf, xt, pdfargs{:});
+  ht = ht-offset;
+  
+  % Figure out what h_u(xt) is a dumb way, uses assumption that the
+  % log pdf is concave
+  hut = min(hprime.*(xt - x) + h);
+
+  % Decide whether to keep the sample
+  if(u(2) < exp(ht - hut))
+    Nsamp = Nsamp+1;
+    samples(Nsamp) = xt;
+  else
+% $$$     fprintf('.');
+  end
+
+  % Update vectors if necessary
+  if(length(x) < Nxmax)
+    [x,z,h,hprime,hu,sc,cu] = insert(x,xt,h,ht,hprime,hpt,support);
+  end
+end
+
+
+
+function [x, z, h, hprime, hu, sc, cu] = ...
+    insert(x, xnew, h, hnew, hprime, hprimenew, support)
+% Insert xnew into x and update all other vectors to reflect the
+% new point's addition.
+
+[x,order] = sort([x xnew]);
+h = [h hnew]; h = h(order);
+hprime = [hprime hprimenew]; hprime = hprime(order);
+
+z = [support(1) x(1:end-1)+(-diff(h)+hprime(2:end).*diff(x)) ./ ...
+      diff(hprime) support(end)];
+hu = [hprime(1) hprime] .* (z - [x(1) x]) + [h(1) h];
+
+% $$$ plot(z, hu);
+
+sc = [0 cumsum(diff(exp(hu)) ./ hprime)];
+cu = sc(end);
+
@@ -0,0 +1,29 @@
+function [Y,z] = drawGmm(mu, sigSq, p, N)
+
+% Draw N samples from a mixture of N Gaussians.  In the multivariate
+% case, mu is a matrix where each row is the mean of one Gaussian.
+% SigSq is a 3D matrix such that sigSq(:,:,i) is the covariance of the
+% ith Gaussian.  In the univariate case, mu(i) is the mean and
+% sigSq(i) is the variance of the ith Gaussian.
+
+% Copyright (C) 2005 Michael Mandel, mim at ee columbia edu;
+% distributable under GPL, see README.txt
+
+% Y ~ sum[ p_i * N(mu_i, sigma_i) ]
+
+[tD,D] = size(mu);
+if(tD == 1) D=1; end
+
+z = drawMultinom(repmat(p(:), 1, N));
+
+if(D == 1)
+  Y = randn(1,N).*sqrt(sigSq(z)) + mu(z);
+else
+  for i=1:length(p)
+    inClass = find(z == i);
+    n = numel(inClass);
+    [u,s,v] = svd(sigSq(:,:,i));
+    sig = sqrt(s)*v';
+    Y(inClass,:) = randn(n,D) * sig + repmat(mu(i,:), n, 1);
+  end
+end
@@ -0,0 +1,15 @@
+function x = drawMultinom(p)
+
+% Draw size(p,2) samples from a multinomial distribution where the
+% elements [1..size(p,1)] have probabilities p.  There should be a way
+% to do it without the repmats...
+
+% Copyright (C) 2005 Michael Mandel, mim at ee columbia edu;
+% distributable under GPL, see README.txt
+
+
+p = cumsum(p);
+pmax = max(max(p))+1;
+u = repmat(rand(1,size(p,2)).*p(end,:), size(p,1), 1);
+m = (u < p) .* (pmax-p);
+x = argmax(m);
@@ -0,0 +1,19 @@
+function Y = drawSpiral(N, std)
+
+% Y = drawSpiral(N, std)
+%
+% Draw N points from a noisy 3D spiral similar to the one used in
+% Rasmussen's paper, which was taken from Ueda et al (1998).  Std is
+% the standard deviation of noise around the spiral.  The default
+% parameters should give something like Ueda's spiral.
+
+% Copyright (C) 2005 Michael Mandel, mim at ee columbia edu;
+% distributable under GPL, see README.txt
+
+
+if(nargin < 2) std = .05; end
+if(nargin < 1) N = 800; end
+
+t = rand(1,N)*4*pi + 2*pi;
+Y = [10*cos(t)./t;-10*sin(t)./t; t/(4*pi)]';
+Y = Y + randn(size(Y))*std;
@@ -0,0 +1,97 @@
+function [mu, sigmaSq, p, z, churn] = ...
+    gibbsGmm(Y, k, m, etaSq, nu0, nu0lambda0, alpha, Nsamp)
+
+% Use Markov chain Monte Carlo simulation to cluster the data Y into a
+% mixture of k univariate Gaussians.  Priors on variables are: mu ~
+% N(m, etaSq), sigmaSq ~ Wishart(nu0, lambda0), pi ~ dirichlet(alpha/k).
+% Outputs of function are samples from the posterior distributions, so
+% that theta(i) = [mu(i,:) sigma(i,:) z ], i = 1..Nsamp
+
+N = length(Y);
+
+% Randomly assign to classes, initialize stats to the means and
+% vars of those classes.
+z = drawMultinom(ones(k,N));
+for j=1:k
+  yj = Y(find(z == j));
+  mu(1,j) = yj(unidrnd(numel(yj)));
+  sigmaSq(1,j) = std(yj).^2;
+end
+p(1,:) = full(sparse(1, z, 1, 1, k));
+
+% Go!
+for i=2:Nsamp
+  % Mu
+  for j=1:k
+    n = sum(z == j);
+    if(n <= 0) ybar = 0;  
+    else       ybar = mean(Y(find(z == j)));
+    end
+
+    tmp_sigSq = 1/(n/sigmaSq(i-1,j) + 1/etaSq);
+    tmp_mu = tmp_sigSq*(n*ybar/sigmaSq(i-1,j) + m/etaSq);
+    mu(i,j) = drawNormal(tmp_mu, tmp_sigSq);
+  end
+
+  % Sigma
+  for j=1:k
+    inClass = z == j;
+    n = sum(inClass);
+    if(n <= 0) sigbar = 0;
+    else       sigbar = sum((Y(find(inClass)) - mu(i-1,j)).^2);
+    end
+    
+    tmp_nu = nu0+n;
+    tmp_nu_lambda = (nu0lambda0 + sigbar);
+    sigmaSq(i,j) = drawInvChiSq(tmp_nu, tmp_nu_lambda);
+  end
+  
+  % z \in {1..k}
+  for j=1:k
+    tmp_pr(j,:) = normalLike(Y, mu(i-1,j), sigmaSq(i-1,j));
+  end
+  n = tabulate(z);
+  n = n(:,2)';
+% $$$   n = full(sparse(1, z, 1, 1, k));
+  
+  % Scale likelihoods by class memberships times prior
+  pri = repmat((n'+alpha/k)/(sum(n)-1+alpha), 1, N);
+  idxs = sub2ind(size(pri), z, [1:N]);
+  pri(idxs) = pri(idxs) - 1/(sum(n)-1+alpha);
+  
+  tz = drawMultinom(pri .* tmp_pr);
+  churn(i) = sum(tz ~= z);
+  z = tz;
+  p(i,:) = n;
+  
+% $$$   plotGmm(mu(i,1), mu(i,2), sigmaSq(i,1), sigmaSq(i,2), p(i));
+% $$$   pause(.1)
+end
+
+
+function x = drawNormal(mu, sigSq)
+% Draw one sample from a Gaussian with mean mu and variance sigSq
+x = randn(1)*sqrt(sigSq) + mu;
+
+
+function pr = normalLike(y, mu, sigSq)
+% Evaluate the likelihood of the points y under the Gaussian with mean
+% mu and variance sigSq
+pr = 1/sqrt(2*pi*sigSq) .* exp(-(y-mu).^2/(2*sigSq));
+
+
+function x = drawInvChiSq(nu, nu_lambda)
+% Draw one sample from an inverse chi square distribution with
+% parameters nu and lambda
+x = nu_lambda / chi2rnd(nu);
+
+
+function x = drawBeta(a, b)
+% Draw one sample from a Beta distribution with parameters a and b
+x = betarnd(a,b);
+
+
+function x = drawBernoulli(p)
+% Draw bernoulli random variables with probability p of getting 1.
+% x is the same size as p.
+x = rand(size(p)) < p;