-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy patheabeta.m
87 lines (73 loc) · 2.88 KB
/
eabeta.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
% [b,W] = eabeta(d2,b0,logK,B) Gaussian EAs: beta and affinities
%
% Computes the values of beta and the corresponding Gaussian affinities for
% one point. It does root finding with Newton's method embedded in a bisection
% loop to ensure global convergence.
%
% In:
% d2: 1 x k vector of square distances to the k nearest neighbors.
% b0: initial value of beta.
% logK: log of the perplexity K.
% B: 1x2 vector of lower and upper bounds on log(beta).
% Out:
% b: log(beta) value.
% W: 1 x k vector of EAs computed with this beta.
% Copyright (c) 2013 by Max Vladymyrov and Miguel A. Carreira-Perpinan
function [b,W] = eabeta(d2,b0,logK,B)
% No need to change these ever, probably:
maxit = 20; % Max. no. iterations before a bisection
tol = 1e-10; % Convergence tolerance to stop iterating
if (b0<B(1) || b0>B(2)) b=(B(1)+B(2))/2; else b=b0; end
i = 1; % maxit counter
% Inside the loop, pbm is a flag to detect any numerical problems (zero
% gradient, infinite function value, etc.).
while 1
bE = exp(b); pbm = 0;
% Compute the function value: m0, m1v, m1 are O(N)
ed2 = exp(-d2*bE); m0 = sum(ed2);
if m0<realmin % Numerical error
e = -logK; pbm = 1;
else
m1v = ed2.*d2/m0; m1 = sum(m1v); e = bE*m1 + log(m0) - logK;
end
if abs(e) < tol break; end
% Very narrow bounds, no need to iterate. This can happen if K is very small.
if B(2)-B(1) < 10*eps break; end
% Update the bounds
if (e<0 && b<=B(2))
B(2) = b;
elseif (e>0 && b>=B(1))
B(1) = b;
end
% on the right term, the first = 0 means the denominator of pn is 0;
% the second = 0 means e is an infinite value, which means at least one
% probability is 0
% the third = 0 means the entropy is smaller than 0, which cannot hold.
% the forth = 0 means the entropy of a distribution on k poins is larger
% than the value of log(k), which cannot hold neither. ---AN
pbm = pbm || isinf(e) || e<-logK || e>log(length(d2))-logK;
if ~pbm
if i==maxit % Exceeded maxit, bisection step
b = (B(1)+B(2))/2; i=1; continue;
end
% Compute the gradient: m2 is O(N)
eg2 = bE^2; m2 = m1v*d2'; m12 = m1^2-m2; g = eg2*m12
if g==0 pbm=1; end
end
% If there was a problem with the function, do bisection with old bounds.
% If there was a problem with the gradient, do bisection with new bounds.
if pbm
% If there is a numerical problem on both ends of the bounds, return
% current value. Practically this happens only when K is very small.
esqd1 = exp(-d2*exp(B(1))); esqd2 = exp(-d2*exp(B(2)));
if sum(esqd1+esqd2) < 2*sqrt(realmin) break; end
b = (B(1)+B(2))/2; i=1; continue;
end
% Newton step ok, update bounds
p = -e/g; b = b + p;
if (b<B(1) || b>B(2)) % Out of bounds, bisection step
b = (B(1)+B(2))/2; i = 0;
end
i=i+1;
end
W = ed2/m0; % Affinities