Commit 6186ecac by Hafiz Imtiaz

updated readme file

parent 375f3e69
# NMF
NMF is a MATLAB code collection for several non-negative matrix factorization algorithms
NMF is a MATLAB code collection for several non-negative matrix factorization algorithms. Currently it contains 8 functions:
* nmf_myalg - function for Proposed Algo1: NMF with outliers using alternating least squares approach
* nmf_myneogm - function for Proposed Algo2: NMF with outliers using Nesterov's optimal gradient approach
* nmf_mydistalg - function for Proposed distributed Algo1: distributed NMF with outliers using alternating least squares approach
* nmf_mydistneogm - function for Proposed distributed Algo2: distributed NMF with outliers using Nesterov's optimal gradient approach
* nmf_alspgd - function for NMF algorith due Chih-Jen Lin. 2007. Projected Gradient Methods for Nonnegative Matrix Factorization. Neural Comput. 19, 10 (October 2007), 2756-2779
* nmf_neogm - function for NMF algorithm due N. Guan, D. Tao, Z. Luo, and B. Yuan. NeNMF: An Optimal Gradient Method for Non-negative Matrix Factorization. IEEE Transactions on Signal Processing, Vol. 60, No. 6, PP. 2882-2898, Jun. 2012.
* nmf_opgd - function for online NMF algorithm due Zhao, Renbo; Tan, Vincent Y. F., 2016. Online Nonnegative Matrix Factorization With Outliers. IEEE Transactions on Signal Processing, vol. 65, issue 3, pp. 555-570
* nmf_bpgd - function for batch NMF algorithm due Zhao, Renbo; Tan, Vincent Y. F., 2016. Online Nonnegative Matrix Factorization With Outliers. IEEE Transactions on Signal Processing, vol. 65, issue 3, pp. 555-570
## Contact
......
function [W, dist_H, dist_R, iter, elapse, HIS] = nmf_mydistalg(dist_V, Winit, Hinit, Rinit, LAMBDA, TOL, TIMELIMIT, MAXITER)
% Code for distributed non-negative Matrix Factorization with outliers: ALSPGD approach
% Outputs:
% W - estimated global W
% dist_H - structure containing estimated H in different sites
% dist_R - structure containing estimated R in different sites
% iter - number of toplevel iteration (communication rounds)
% elapse - total elapseed time
% HIS - History of computation,
% niter - total iteration number spent
% cpus - CPU seconds at iteration rounds,
% objf - objective function values at iteration rounds,
% prjg - projected gradient norm at iteration rounds.
% relerr - ratio of frobenius norm of error and sample matrix
% Inputs:
% dist_V - structure containing samples in each site
% Winit, Hinit, Rinit - initial values
% lambda - regularizer parameter
% tol - parameter for stopping condition
% timelimit, maxiter - for stopping the iteration
% Written by Hafiz Imtiaz (hafiz.imtiaz@rutgers.edu), based on the ALSPGRAD code by Chih-Jen Lin
S = length(dist_V); % number of sites
% initialization
W = Winit;
dist_H = cell(S,1);
dist_R = cell(S,1);
for s = 1:S
dist_H{s} = Hinit;
dist_R{s} = Rinit;
end
K = size(W,2);
lambda = LAMBDA;
tol = TOL;
maxiter = MAXITER;
timelimit = TIMELIMIT;
%% distributed NMF
flag = true;
iter = 0;
M = 1;
initgradW = 0;
initgradH = 0;
initgradR = 0;
objective_old = 0;
Vtest = [];
Htest = [];
Rtest = [];
for s = 1:S
Gs = W * (dist_H{s} * dist_H{s}') - (dist_V{s} - dist_R{s}) * dist_H{s}';
initgradW = initgradW + Gs;
initgradH = initgradH + norm((W'*W)*dist_H{s} - W'*(dist_V{s} - dist_R{s}),'fro')^2;
initgradR = initgradR + norm(dist_R{s} - (dist_V{s} - W * dist_H{s}) + lambda * sign(dist_R{s}),'fro')^2;
objective_old = objective_old + (1/2)*norm(dist_V{s} - W * dist_H{s} - dist_R{s}, 'fro')^2 + lambda * sum(sum(abs(dist_R{s})));
% the following quantities are for debugging
Vtest = [Vtest dist_V{s}];
Htest = [Htest dist_H{s}];
Rtest = [Rtest dist_R{s}];
end
initgradW = norm(initgradW,'fro');
initgrad = sqrt(initgradW^2 + initgradH + initgradR);
tolW = max(0.001,tol)*initgrad;
tolH = tolW*ones(S,1);
tolR = 100*tol*ones(S,1);
% Historical information
HIS.niter = 0;
HIS.cpus = 0;
HIS.objf = objective_old;
HIS.prjg = initgrad;
HIS.relerr = norm(Vtest - W*Htest - Rtest,'fro') / norm(Vtest, 'fro');
elapse = cputime;
initt = cputime;
while flag && (iter <= maxiter)
iter = iter + 1;
gradW_HHt = 0;
gradW_VHt = 0;
% update H and R in local sites
Htest = [];
Rtest = [];
tot_iter_HR = 0;
projgradH = 0;
projgradR = 0;
for s = 1:S
% solving for local H
[dist_H{s}, ~, iterH(s)] = myNLS_H(dist_V{s}, W, dist_H{s}, dist_R{s}, tolH(s), maxiter);
if iterH(s) < 2
tolH(s) = 0.1 * tolH(s);
end
% solving for local R
[dist_R{s}, ~, iterR(s)] = myNLS_R(dist_V{s}, W, dist_H{s}, dist_R{s}, lambda, M, tolR(s), maxiter);
% sending these two information to central site
gradH = (W'*W) * dist_H{s} - W' * (dist_V{s} - dist_R{s});
gradR = dist_R{s} - (dist_V{s} - W * dist_H{s}) + lambda * sign(dist_R{s});
projgradH = projgradH + norm(myProjGrad(dist_H{s}, gradH, 0, 1e10), 'fro')^2; % scalar
projgradR = projgradR + norm(myProjGrad(dist_R{s}, gradR, -M, M), 'fro')^2; % scalar
gradW_HHt = gradW_HHt + dist_H{s} * dist_H{s}'; % k-by-k matrix
gradW_VHt = gradW_VHt + (dist_V{s} - dist_R{s}) * dist_H{s}'; % d-by-k matrix
% book keeping
tot_iter_HR = tot_iter_HR + sum(iterH) + sum(iterR);
Htest = [Htest dist_H{s}];
Rtest = [Rtest dist_R{s}];
end
% update W at central site
[W,gradW,iterW] = myNLS_W(gradW_VHt', gradW_HHt', W', tolW, maxiter);
W = W';
gradW = gradW';
for k = 1:K
W(:,k) = W(:,k) / max( 1 , norm(W(:,k)) );
end
if iterW < 2
tolW = 0.1 * tolW;
end
% Output running detials
HIS.niter = HIS.niter + tot_iter_HR + iterW;
HIS.cpus = [HIS.cpus, cputime-elapse];
projnorm = sqrt(norm(gradW(gradW < 0 | W > 0))^2 + projgradH + projgradR);
objf = (1/2) * norm(Vtest - W*Htest - Rtest,'fro')^2 + lambda * sum(sum(abs(Rtest)));
HIS.objf = [HIS.objf, objf];
HIS.prjg = [HIS.prjg, projnorm];
relerr = norm(Vtest - W*Htest - Rtest,'fro') / norm(Vtest, 'fro');
HIS.relerr = [HIS.relerr,relerr];
if rem(iter,10) == 0
fprintf('%d:\tstopping criteria = %e,\tobjective value = %f.\n', iter,projnorm/initgrad,HIS.objf(end));
end
% stopping condition check
if iterW >= 2
if (projnorm <= tol*initgrad) || (cputime-initt > timelimit)
flag = false;
end
end
end
elapse = cputime - elapse;
fprintf('\nFinal Iter = %d,\tFinal Elapse = %f.\n', iter,elapse);
%% helper functions
function [H,grad,iter] = myNLS_H(V, W, Hinit, R, tolH, maxiter)
H = Hinit;
WtW = W'*W;
WtV = W'*(V - R);
grad = WtW*H - WtV;
alpha = 1; beta = 0.1;
for iter = 1:maxiter
% search step size
for inner_iter = 1:20
Hn = max(H - alpha*grad, 0);
d = Hn-H;
gradd = sum(sum(grad.*d));
dQd = sum(sum((WtW*d).*d));
suff_decr = 0.99*gradd + 0.5*dQd < 0;
if inner_iter == 1
decr_alpha = ~suff_decr;
Hp = H;
end
if decr_alpha
if suff_decr
H = Hn;
break;
else
alpha = alpha * beta;
end
else
if (~suff_decr) | (Hp == Hn)
H = Hp;
break;
else
alpha = alpha/beta;
Hp = Hn;
end
end
end
WtV = W'*(V-R);
grad = WtW * H - WtV;
% stopping condition check
projgrad = norm(grad(grad < 0 | H >0));
if projgrad < tolH
break
end
end
if iter==maxiter
fprintf('Max iter in myNLS_H\n');
end
function y = mySoftThresh(x, lambda, M)
% Implementation of the soft thresholding function defined in Renbo
% Zhao et al. 2016 "Online NMF with Outliers", eqn 17
% x - input vector of dimension F
% lambda - penalty parameter (here threshold parameter)
% M - parameter of the constraint set of the outlier r
% y - output vector of dimension F
y = 0 * (abs(x) < lambda) + (x - sign(x) * lambda) .* ((x >= lambda) & (x <= M)) + (sign(x) * M) .* (x >= lambda + M);
return
function [H,grad,iter] = myNLS_W(WtV, WtW, Hinit, tol, maxiter)
H = Hinit;
% V = V - R;
% WtV = W'*V;
% WtW = W'*W;
alpha = 1; beta = 0.1;
for iter = 1:maxiter
grad = WtW*H - WtV;
% projgrad = norm(myProjGrad(H, grad, 0, 1e10), 'fro');
projgrad = norm(grad(grad < 0 | H >0));
if projgrad < tol
break
end
% search step size
for inner_iter = 1:20
Hn = max(H - alpha*grad, 0);
d = Hn-H;
gradd = sum(sum(grad.*d));
dQd = sum(sum((WtW*d).*d));
suff_decr = 0.99*gradd + 0.5*dQd < 0;
if inner_iter == 1
decr_alpha = ~suff_decr;
Hp = H;
end
if decr_alpha
if suff_decr
H = Hn;
break;
else
alpha = alpha * beta;
end
else
if (~suff_decr) | (Hp == Hn)
H = Hp;
break;
else
alpha = alpha/beta;
Hp = Hn;
end
end
end
end
if iter==maxiter
fprintf('Max iter in myNLS_W\n');
end
function [R, grad, iter] = myNLS_R(V, W, H, Rinit, lambda, M, tol, maxiter)
R = Rinit;
objective_old = (1/2) * norm(V - W*H - R, 'fro')^2 + lambda * sum(sum(abs(R)));
iter = 0;
flag = true;
while (iter <= maxiter) && flag
iter = iter + 1;
R = mySoftThresh(V - W*H, lambda, M);
% stopping condition check
objective_new = (1/2) * norm(V - W*H - R, 'fro')^2 + lambda * sum(sum(abs(R)));
if abs(objective_new - objective_old) < tol * objective_old
flag = false;
else
objective_old = objective_new;
end
end
grad = R - (V - W*H) + lambda*sign(R);
if iter==maxiter
fprintf('Max iter in nlssubprobR\n');
end
function pgd = myProjGrad(x, gradx, lLim, uLim)
pgd = gradx .* ((x >= lLim) & (x <= uLim)) + min(0, gradx) .* (x <= lLim) + max(0, gradx) .* (x >= uLim);
function [W, dist_H, dist_R, iter, elapse, HIS] = nmf_mydistneogm(dist_V, Winit, Hinit, Rinit, LAMBDA, TOL, TIMELIMIT, MAXITER)
% Code for distributed non-negative Matrix Factorization with outliers: NeNMF approach
% Outputs:
% W - estimated global W
% dist_H - structure containing estimated H in different sites
% dist_R - structure containing estimated R in different sites
% iter - number of toplevel iteration (communication rounds)
% elapse - total elapseed time
% HIS - History of computation,
% niter - total iteration number spent
% cpus - CPU seconds at iteration rounds,
% objf - objective function values at iteration rounds,
% prjg - projected gradient norm at iteration rounds.
% relerr - ratio of frobenius norm of error and sample matrix
% Inputs:
% dist_V - structure containing samples in each site
% Winit, Hinit, Rinit - initial values
% lambda - regularizer parameter
% tol - parameter for stopping condition
% timelimit, maxiter - for stopping the iteration
% Written by Hafiz Imtiaz (hafiz.imtiaz@rutgers.edu), based on the ALSPGRAD code by Chih-Jen Lin
S = length(dist_V); % number of sites
MinIter = 10;
% MaxTime = 100000;
% initialization
W = Winit;
dist_H = cell(S,1);
dist_R = cell(S,1);
for s = 1:S
dist_H{s} = Hinit;
dist_R{s} = Rinit;
end
K = size(W,2);
lambda = LAMBDA;
tol = TOL;
maxiter = MAXITER;
timelimit = TIMELIMIT;
%% distributed NMF
flag = true;
iter = 0;
M = 1;
initgradW = 0;
initgradH = 0;
initgradR = 0;
objective_old = 0;
Vtest = [];
Htest = [];
Rtest = [];
for s = 1:S
Gs = W * (dist_H{s} * dist_H{s}') - (dist_V{s} - dist_R{s}) * dist_H{s}';
initgradW = initgradW + Gs;
initgradH = initgradH + norm((W'*W)*dist_H{s} - W'*(dist_V{s} - dist_R{s}),'fro')^2;
initgradR = initgradR + norm(dist_R{s} - (dist_V{s} - W * dist_H{s}) + lambda * sign(dist_R{s}),'fro')^2;
objective_old = objective_old + (1/2)*norm(dist_V{s} - W * dist_H{s} - dist_R{s}, 'fro')^2 + lambda * sum(sum(abs(dist_R{s})));
% the following quantities are for debugging
Vtest = [Vtest dist_V{s}];
Htest = [Htest dist_H{s}];
Rtest = [Rtest dist_R{s}];
end
initgradW = norm(initgradW,'fro');
initgrad = sqrt(initgradW^2 + initgradH + initgradR);
tolW = max(0.001,tol)*initgrad;
tolH = tol*ones(S,1);
% tolR = tol*ones(S,1);
% Historical information
HIS.niter = 0;
HIS.cpus = 0;
HIS.objf = objective_old;
HIS.prjg = initgrad;
HIS.relerr = norm(Vtest - W*Htest - Rtest,'fro') / norm(Vtest, 'fro');
elapse = cputime;
initt = cputime;
while flag && (iter <= maxiter)
iter = iter + 1;
gradW_HHt = 0;
gradW_VHt = 0;
% update H and R in local sites
Htest = [];
Rtest = [];
tot_iter_HR = 0;
projgradH = 0;
projgradR = 0;
for s = 1:S
% update H and R at local site
[dist_H{s}, dist_R{s}, iterHR(s)] = myNNLS(dist_V{s}, W, dist_H{s}, dist_R{s}, lambda, MinIter, maxiter, tolH(s));
if iterHR(s) < MinIter
tolH(s) = 0.1 * tolH(s);
end
% sending these two information to central site
gradH = (W'*W) * dist_H{s} - W' * (dist_V{s} - dist_R{s});
gradR = dist_R{s} - (dist_V{s} - W * dist_H{s}) + lambda * sign(dist_R{s});
projgradH = projgradH + norm(myProjGrad(dist_H{s}, gradH, 0, 1e10), 'fro')^2; % scalar
projgradR = projgradR + norm(myProjGrad(dist_R{s}, gradR, -M, M), 'fro')^2; % scalar
gradW_HHt = gradW_HHt + dist_H{s} * dist_H{s}'; % k-by-k matrix
gradW_VHt = gradW_VHt + (dist_V{s} - dist_R{s}) * dist_H{s}'; % d-by-k matrix
% book keeping
tot_iter_HR = tot_iter_HR + sum(iterHR);
Htest = [Htest dist_H{s}];
Rtest = [Rtest dist_R{s}];
end
% Optimize W at central site
[W, iterW, gradW] = NNLS(W', gradW_HHt, gradW_VHt', MinIter, maxiter, tolW);
W = W';
gradW = gradW';
for k = 1:K
W(:,k) = W(:,k) / max( 1 , norm(W(:,k)) );
end
if iterW < MinIter
tolW = tolW/10;
end
% Output running detials
HIS.niter = HIS.niter + tot_iter_HR + iterW;
HIS.cpus = [HIS.cpus, cputime-elapse];
projnorm = sqrt(norm(gradW(gradW < 0 | W > 0))^2 + projgradH + projgradR);
objf = (1/2) * norm(Vtest - W*Htest - Rtest,'fro')^2 + lambda * sum(sum(abs(Rtest)));
HIS.objf = [HIS.objf, objf];
HIS.prjg = [HIS.prjg, projnorm];
relerr = norm(Vtest - W*Htest - Rtest,'fro') / norm(Vtest, 'fro');
HIS.relerr = [HIS.relerr,relerr];
if rem(iter,10) == 0
fprintf('%d:\tstopping criteria = %e,\tobjective value = %f.\n', iter,projnorm/initgrad,HIS.objf(end));
end
% stopping condition check
if iterW >= MinIter
if (projnorm <= tol*initgrad) || (cputime-initt > timelimit)
flag = false;
end
end
end
elapse = cputime - elapse;
fprintf('\nFinal Iter = %d,\tFinal Elapse = %f.\n', iter,elapse);
%% helper functions
function [H,R,iter,Grad] = myNNLS(V,W,Z,R,lambda,iterMin,iterMax,tol)
WtW = W'*W;
if ~issparse(WtW)
L = norm(WtW); % Lipschitz constant
else
L = norm(full(WtW));
end
H = Z; % Initialization
WtV = W'*(V-R);
Grad = WtW*H - WtV; % Gradient
GradR = R - (V - W*H) + lambda*sign(R);
initgrad = sqrt(norm(Grad, 'fro')^2 + norm(GradR, 'fro')^2);
alpha1 = 1; M = 1;
for iter = 1:iterMax
% update H
H0 = H;
H = max(Z - Grad/L,0); % Calculate sequence 'Y'
alpha2 = 0.5*(1 + sqrt(1 + 4*alpha1^2));
Z = H + ((alpha1-1)/alpha2)*(H-H0);
alpha1 = alpha2;
% update R
R = mySoftThresh(V - W*H, lambda, 1);
GradR = R - (V - W*H) + lambda*sign(R);
WtV = W'*(V-R);
Grad = WtW * Z - WtV;
% Stopping criteria
if iter >= iterMin
% Lin's stopping condition
projnorm = sqrt( norm(myProjGrad(Z, Grad, 0, 1e10), 'fro')^2 + norm(myProjGrad(R, GradR, -M, M), 'fro')^2 );
if (projnorm <= tol*initgrad)
break;
end
end
end
Grad = WtW * H - WtV;
return;
function y = mySoftThresh(x, lambda, M)
% Implementation of the soft thresholding function defined in Renbo
% Zhao et al. 2016 "Online NMF with Outliers", eqn 17
% x - input vector of dimension F
% lambda - penalty parameter (here threshold parameter)
% M - parameter of the constraint set of the outlier r
% y - output vector of dimension F
y = 0 * (abs(x) < lambda) + (x - sign(x) * lambda) .* ((x >= lambda) & (x <= M)) + (sign(x) * M) .* (x >= lambda + M);
return
function [H,iter,Grad] = NNLS(Z,WtW,WtV,iterMin,iterMax,tol)
if ~issparse(WtW)
L = norm(WtW); % Lipschitz constant
else
L = norm(full(WtW));
end
H = Z; % Initialization
Grad = WtW*Z - WtV; % Gradient
alpha1 = 1;
for iter = 1:iterMax
H0 = H;
H = max(Z - Grad/L,0); % Calculate sequence 'Y'
alpha2 = 0.5*(1 + sqrt(1 + 4*alpha1^2));
Z = H + ((alpha1-1)/alpha2)*(H-H0);
alpha1 = alpha2;
Grad = WtW * Z - WtV;
% Stopping criteria
if iter >= iterMin
% Lin's stopping condition
pgn = norm(Grad(Grad < 0 | Z > 0));
if pgn <= tol
break;
end
end
end
Grad = WtW * H - WtV;
return;
function pgd = myProjGrad(x, gradx, lLim, uLim)
pgd = gradx .* ((x >= lLim) & (x <= uLim)) + min(0, gradx) .* (x <= lLim) + max(0, gradx) .* (x >= uLim);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment