bipartite_matching.m

function [val m1 m2 mi]=bipartite_matching(varargin)
% BIPARTITE_MATCHING Solve a maximum weight bipartite matching problem
%
% [val m1 m2]=bipartite_matching(A) for a rectangular matrix A 
% [val m1 m2 mi]=bipartite_matching(x,ei,ej,n,m) for a matrix stored
% in triplet format.  This call also returns a matching indicator mi so
% that val = x'*mi.
%
% The maximum weight bipartite matching problem tries to pick out elements
% from A such that each row and column get only a single non-zero but the
% sum of all the chosen elements is as large as possible.
%
% This function is slightly atypical for a graph library, because it will
% be primarily used on rectangular inputs.  However, these rectangular
% inputs model bipartite graphs and we take advantage of that stucture in
% this code.  The underlying graph adjency matrix is 
%   G = spaugment(A,0); 
% where A is the rectangular input to the bipartite_matching function.
%
% Matlab already has the dmperm function that computes a maximum
% cardinality matching between the rows and the columns.  This function
% gives us the maximum weight matching instead.  For unweighted graphs, the
% two functions are equivalent.
%
% Note: If ei and ej contain duplicate edges, the results of this function
% are incorrect.
%
% See also DMPERM
%
% Example:
%   A = rand(10,8); % bipartite matching between random data
%   [val mi mj] = bipartite_matching(A);
%   val

% David F. Gleich and Ying Wang
% Copyright, Stanford University, 2008-2009
% Computational Approaches to Digital Stewardship

% 2008-04-24: Initial coding (copy from Ying Wang matching_sparse_mex.cpp)
% 2008-11-15: Added triplet input/output
% 2009-04-30: Modified for gaimc library
% 2009-05-15: Fixed error with empty inputs and triple added example.

[rp ci ai tripi n m] = bipartite_matching_setup(varargin{:});

if isempty(tripi)
    error(nargoutchk(0,3,nargout,'struct'));
else    
    error(nargoutchk(0,4,nargout,'struct'));
end


if ~isempty(tripi) && nargout>3
    [val m1 m2 mi] = bipartite_matching_primal_dual(rp, ci, ai, tripi, n, m);
else
    [val m1 m2] = bipartite_matching_primal_dual(rp, ci, ai, tripi, n, m);
end

function [rp ci ai tripi n m]= bipartite_matching_setup(A,ei,ej,n,m)
% convert the input

if nargin == 1
    if isstruct(A)
        [nzi nzj nzv]=csr_to_sparse(A.rp,A.ci,A.ai);
    else
        [nzi nzj nzv]=find(A); 
    end
    [n m]=size(A);
    triplet = 0;
elseif nargin >= 3 && nargin <= 5    
    nzi = ei;
    nzj = ej;
    nzv = A;
    if ~exist('n','var') || isempty(n), n = max(nzi); end
    if ~exist('m','var') || isempty(m), m = max(nzj); end
    triplet = 1;
else    
    error(nargchk(3,5,nargin,'struct'));
end
nedges = length(nzi);

rp = ones(n+1,1); % csr matrix with extra edges
ci = zeros(nedges+n,1);
ai = zeros(nedges+n,1);
if triplet, tripi = zeros(nedges+n,1); % triplet index
else tripi = [];
end

%
% 1. build csr representation with a set of extra edges from vertex i to
% vertex m+i
%
rp(1)=0;
for i=1:nedges
    rp(nzi(i)+1)=rp(nzi(i)+1)+1;
end
rp=cumsum(rp); 
for i=1:nedges
    if triplet, tripi(rp(nzi(i))+1)=i; end % triplet index
    ai(rp(nzi(i))+1)=nzv(i);
    ci(rp(nzi(i))+1)=nzj(i);
    rp(nzi(i))=rp(nzi(i))+1;
end
for i=1:n % add the extra edges
    if triplet, tripi(rp(i)+1)=-1; end % triplet index
    ai(rp(i)+1)=0;
    ci(rp(i)+1)=m+i;
    rp(i)=rp(i)+1;
end
% restore the row pointer array
for i=n:-1:1
    rp(i+1)=rp(i);
end
rp(1)=0;
rp=rp+1;

%
% 1a. check for duplicates in the data
%
colind = false(m+n,1);
for i=1:n
    for rpi=rp(i):rp(i+1)-1
        if colind(ci(rpi)), error('bipartite_matching:duplicateEdge',...
            'duplicate edge detected (%i,%i)',i,ci(rpi)); 
        end
        colind(ci(rpi))=1;
    end
    for rpi=rp(i):rp(i+1)-1, colind(ci(rpi))=0; end % reset indicator
end


function [val m1 m2 mi]=bipartite_matching_primal_dual(...
                            rp, ci, ai, tripi, n, m)
% BIPARTITE_MATCHING_PRIMAL_DUAL                         

alpha=zeros(n,1); % variables used for the primal-dual algorithm
beta=zeros(n+m,1);
queue=zeros(n,1);
t=zeros(n+m,1);
match1=zeros(n,1);
match2=zeros(n+m,1);
tmod = zeros(n+m,1);
ntmod=0;


% 
% initialize the primal and dual variables
%
for i=1:n
    for rpi=rp(i):rp(i+1)-1
        if ai(rpi) > alpha(i), alpha(i)=ai(rpi); end
    end
end
% dual variables (beta) are initialized to 0 already
% match1 and match2 are both 0, which indicates no matches
i=1;
while i<=n
    % repeat the problem for n stages
    
    % clear t(j)
    for j=1:ntmod, t(tmod(j))=0; end
    ntmod=0;
    

    % add i to the stack
    head=1; tail=1;
    queue(head)=i; % add i to the head of the queue
    while head <= tail && match1(i)==0
        k=queue(head);
        for rpi=rp(k):rp(k+1)-1
            j = ci(rpi);
            if ai(rpi) < alpha(k)+beta(j) - 1e-8, continue; end % skip if tight
            if t(j)==0,
                tail=tail+1; queue(tail)=match2(j);
                t(j)=k;
                ntmod=ntmod+1; tmod(ntmod)=j;
                if match2(j)<1,
                    while j>0, 
                        match2(j)=t(j);
                        k=t(j);
                        temp=match1(k);
                        match1(k)=j;
                        j=temp;
                    end
                    break; % we found an alternating path
                end
            end
        end
        head=head+1;
    end
    
    if match1(i) < 1, % still not matched, so update primal, dual and repeat
        theta=inf;
        for j=1:head-1
            t1=queue(j);
            for rpi=rp(t1):rp(t1+1)-1
                t2=ci(rpi);
                if t(t2) == 0 && alpha(t1) + beta(t2) - ai(rpi) < theta,
                    theta = alpha(t1) + beta(t2) - ai(rpi);
                end
            end
        end
        
        for j=1:head-1, alpha(queue(j)) = alpha(queue(j)) - theta; end
        
        for j=1:ntmod, beta(tmod(j)) = beta(tmod(j)) + theta; end
            
        continue;
    end
        
    i=i+1; % increment i
end

val=0;
for i=1:n
    for rpi=rp(i):rp(i+1)-1
        if ci(rpi)==match1(i), val=val+ai(rpi); end
    end
end
noute = 0; % count number of output edges
for i=1:n
    if match1(i)<=m, noute=noute+1; end
end
m1=zeros(noute,1); m2=m1; % copy over the 0 array
noute=1;
for i=1:n
    if match1(i)<=m, m1(noute)=i; m2(noute)=match1(i);noute=noute+1; end
end

if nargout>3
    mi= false(length(tripi)-n,1);
    for i=1:n
        for rpi=rp(i):rp(i+1)-1
            if match1(i)<=m && ci(rpi)==match1(i), mi(tripi(rpi))=1; end
        end
    end
end