Commit 3d09fb3d by Hafiz Imtiaz

initial commit. contains functions for ALSPGD, BPGD, OPGD, NeNMF, Proposed Algo1, Proposed Algo2

parents
function [W,H, iter, elapse, HIS] = nmf_alspgd(V,Winit,Hinit,tol,timelimit,maxiter)
% NMF by alternative non-negative least squares using projected gradients
% Reference
% Chih-Jen Lin. 2007. Projected Gradient Methods for Nonnegative Matrix
% Factorization. Neural Comput. 19, 10 (October 2007), 2756-2779.
% DOI=http://dx.doi.org/10.1162/neco.2007.19.10.2756
% W , H = output solution
% Winit , Hinit = initial solution
% tol = tolerance for a relative stopping condition
% timelimit , maxiter = limit of time and iterations
% Edit: additional outputs by hafiz Imtiaz
% HIS : (debugging purpose) History of computation,
% niter - total iteration number spent for Nesterov's optimal
% gradient method,
% cpus - CPU seconds at iteration rounds,
% objf - objective function values at iteration rounds,
% prjg - projected gradient norm at iteration rounds.
% relerr - relative error at each top-level iteration
W = Winit;
H = Hinit;
initt = cputime;
gradW = W*(H*H') - V*H';
gradH = (W'*W)*H - W'*V;
initgrad = norm([gradW; gradH'],'fro');
fprintf('Init gradient norm %f\n', initgrad);
tolW = max(0.001,tol)*initgrad;
tolH = tolW;
% Historical information
HIS.niter = 0;
HIS.cpus = 0;
HIS.objf = (1/2) * norm(V - W*H,'fro')^2;
HIS.prjg = initgrad;
HIS.relerr = norm(V - W*H,'fro') / norm(V, 'fro');
elapse = cputime;
for iter = 1:maxiter
[W, gradW, iterW] = nlssubprob(V',H',W', tolW, 1000);
W = W';
gradW = gradW';
if iterW == 1
tolW = 0.1 * tolW;
end
[H, gradH, iterH] = nlssubprob(V, W, H, tolH, 1000);
if iterH == 1
tolH = 0.1 * tolH;
end
HIS.niter = HIS.niter + iterH + iterW;
% stopping condition
projnorm = norm([gradW(gradW < 0 | W > 0); gradH(gradH < 0 | H > 0)]);
% Output running detials
objf = (1/2) * norm(V - W*H,'fro')^2;
HIS.objf = [HIS.objf, objf];
HIS.cpus = [HIS.cpus, cputime-elapse];
HIS.prjg = [HIS.prjg, projnorm];
relerr = norm(V - W*H,'fro') / norm(V, 'fro');
HIS.relerr = [HIS.relerr,relerr];
if rem(iter,10) == 0
fprintf('%d:\tstopping criteria = %e,\tobjective value = %f.\n', iter,projnorm/initgrad,HIS.objf(end));
end
if (projnorm < tol*initgrad) || (cputime-initt > timelimit)
break
end
end
elapse = cputime - elapse;
fprintf('\nFinal Iter = %d,\tFinal Elapse = %f.\n', iter,elapse);
% helper function: alternating non-negative least squares
function [H,grad,iter] = nlssubprob(V,W,Hinit,tol,maxiter)
% H , grad = output solution and gradient
% iter = #iterations used
% V , W = constant matrices
% Hinit = initial solution
% tol = stopping tolerance
% maxiter = limit of iterations
H = Hinit;
WtV = W'*V;
WtW = W'*W;
alpha = 1; beta = 0.1;
for iter = 1:maxiter
grad = WtW*H - WtV;
projgrad = norm(grad(grad < 0 | H >0));
if projgrad < tol
break
end
% search step size
for inner_iter = 1:20
Hn = max(H - alpha*grad, 0);
d = Hn-H;
gradd = sum(sum(grad.*d));
dQd = sum(sum((WtW*d).*d));
suff_decr = 0.99*gradd + 0.5*dQd < 0;
if inner_iter == 1
decr_alpha = ~suff_decr;
Hp = H;
end
if decr_alpha
if suff_decr
H = Hn;
break;
else
alpha = alpha * beta;
end
else
if (~suff_decr) | (Hp == Hn)
H = Hp;
break;
else
alpha = alpha/beta;
Hp = Hn;
end
end
end
end
if iter==maxiter
fprintf('Max iter in nlssubprob\n');
end
\ No newline at end of file
function [W, H, R, iter, elapse, HIS] = nmf_bpgd(V, Winit, Hinit, Rinit, lambda, tol, maxiter)
% Batch PGD algorithm
% Reference
% Zhao, Renbo; Tan, Vincent Y. F., 2016.
% Online Nonnegative Matrix Factorization With Outliers
% IEEE Transactions on Signal Processing, vol. 65, issue 3, pp. 555-570
% W , H , R = output solution
% iter = required iterations for solving
% elapse : CPU time in seconds.
% HIS : history of computation,
% niter - total iteration number spent for each sub-iteration
% cpus - CPU seconds at iteration rounds,
% objf - objective function values at iteration rounds,
% prjg - projected gradient norm at iteration rounds.
% relerr - relative error at each top-level iteration
% V = data matrix, samples in columns
% lambda = penalty parameter for outlier regularization
% Winit , Hinit, Rinit = initial solution
% tol = tolerance for a relative stopping condition (section 7-B)
% maxiter = limit of iterations
[~, K] = size(Winit);
M = 1;
W = Winit;
H = Hinit;
R = Rinit;
objective_old = (1/2)*norm(V - W*H - R, 'fro') + lambda*sum(sum(abs(R)));
flag = true;
iter = 0;
% Historical information
HIS.niter = 0;
HIS.cpus = 0;
HIS.objf = (1/2) * norm(V - W*H - R,'fro')^2 + lambda * sum(sum(abs(R)));
HIS.relerr = norm(V - W*H - R,'fro') / norm(V, 'fro');
elapse = cputime;
while (iter <= maxiter) && flag
iter = iter + 1;
eta_H = 0.7/norm(W)^2;
H = max(0,H - eta_H * W' * (W*H + R - V));
R = mySoftThresh(V - W*H, lambda, M);
eta_W = 0.7/norm(H*H', 'fro');
grad_W = W * (H*H') - (V-R)*H';
W = max(0,W - eta_W * grad_W);
for k = 1:K
W(:,k) = W(:,k) / max( 1 , norm(W(:,k)) );
end
% stopping condition check
objective_new = (1/2)*norm(V - W*H - R, 'fro') + lambda*sum(sum(abs(R)));
if abs(objective_new - objective_old) < tol * objective_old
flag = false;
else
objective_old = objective_new;
end
% Output running detials
objf = (1/2) * norm(V - W*H - R,'fro')^2 + lambda * sum(sum(abs(R)));
HIS.objf = [HIS.objf, objf];
HIS.cpus = [HIS.cpus, cputime-elapse];
relerr = norm(V - W*H - R,'fro') / norm(V, 'fro');
HIS.relerr = [HIS.relerr,relerr];
if rem(iter,10) == 0
fprintf('%d:\tobjective value = %f.\n', iter,HIS.objf(end));
end
end
HIS.iter = iter;
elapse = cputime - elapse;
fprintf('\nFinal Iter = %d,\tFinal Elapse = %f.\n', iter,elapse);
% helper functions
function y = mySoftThresh(x, lambda, M)
% Implementation of the soft thresholding function defined in Renbo
% Zhao et al. 2016 "Online NMF with Outliers", eqn 17
% x - input vector of dimension F
% lambda - penalty parameter (here threshold parameter)
% M - parameter of the constraint set of the outlier r
% y - output vector of dimension F
y = 0 * (abs(x) < lambda) + (x - sign(x) * lambda) .* ((x >= lambda) & (x <= M)) + (sign(x) * M) .* (x >= lambda + M);
return
\ No newline at end of file
function [W,H,R,iter, elapse, HIS] = nmf_myalg(V,Winit,Hinit,Rinit,lambda,tol,timelimit,maxiter)
% Code for non-negative Matrix Factorization with outliers
% Reference
% Chih-Jen Lin. 2007. Projected Gradient Methods for Nonnegative Matrix
% Factorization. Neural Comput. 19, 10 (October 2007), 2756-2779.
% DOI=http://dx.doi.org/10.1162/neco.2007.19.10.2756
% Model: V = WH + R
% V (d x n): data matrix including n samples in d-dimensions
% W (d x k): basis matrix including k bases in d-dimensions
% H (k x n): coefficients matrix includeing n encodings in k-dimensions
% R (d x n): outlier matrix includeing n outlier vectors in d-dimensions
% Written by Hafiz Imtiaz (hafiz.imtiaz@rutgers.edu), based on the ALSPGRAD code by Chih-Jen Lin
% <Inputs>
% V : input data matrix (d x n)
% Winit : (d x k) initial value for W.
% Hinit : (k x n) initial value for H.
% Rinit : (d x n) initial value for R.
% lambda : regularizer for outliers.
% tol : stopping tolerance.
% maxiter : maximum number of iterations.
% <Outputs>
% W : estimated basis matrix (d x k).
% H : estimated coefficients matrix (k x n).
% R : estimated outlier matrix (d x n).
% iter : number of top-level iterations.
% elapse : CPU time in seconds.
% HIS : history of computation,
% niter - total iteration number spent for each sub-iteration
% cpus - CPU seconds at iteration rounds,
% objf - objective function values at iteration rounds,
% prjg - projected gradient norm at iteration rounds.
% relerr - relative error at each top-level iteration
W = Winit;
H = Hinit;
R = Rinit;
M = 1;
initt = cputime;
gradW = W*(H*H') - (V-R)*H';
gradH = (W'*W)*H - W'*(V-R);
gradR = R - (V - W*H) + lambda*sign(R);
initgrad = sqrt(norm(gradW, 'fro')^2 + ...
norm(gradH, 'fro')^2 + ...
norm(gradR, 'fro')^2);
fprintf('Init gradient norm %f\n', initgrad);
tolW = max(0.001,tol)*initgrad;
tolH = tolW;
tolR = 100*tol;
% Historical information
HIS.niter = 0;
HIS.cpus = 0;
HIS.objf = (1/2) * trace((V - W*H - R)'*(V - W*H - R)) + lambda * sum(sum(abs(R)));
HIS.prjg = initgrad;
HIS.relerr = norm(V - W*H - R,'fro') / norm(V, 'fro');
elapse = cputime;
for iter = 1:maxiter
% solving for W with H and R fixed
[W, gradW, iterW] = nlssubprob(V',H',W', R', tolW, 1000);
W = W';
gradW = gradW';
[~, K] = size(W);
for k = 1:K
W(:,k) = W(:,k) / max( 1 , norm(W(:,k)) );
end
if iterW < 2
tolW = 0.1 * tolW;
end
% solving for H with W and R fixed
[H, gradH, iterH] = nlssubprob(V, W, H, R, tolH, 1000);
if iterH < 2
tolH = 0.1 * tolH;
end
% solving for R with W and H fixed
[R, gradR, iterR] = nlssubprobR(V, W, H, R, lambda, M, tolR, 1000);
HIS.niter = HIS.niter + iterH + iterW + iterR;
HIS.cpus = [HIS.cpus, cputime-elapse];
% stopping condition
projnorm = sqrt( norm(myProjGrad(H, gradH, 0, 1e10), 'fro')^2 + ...
norm(myProjGrad(W, gradW, 0, 1e10), 'fro')^2 + ...
norm(myProjGrad(R, gradR, -M, M), 'fro')^2);
% Output running detials
objf = (1/2) * trace((V - W*H - R)'*(V - W*H - R)) + lambda * sum(sum(abs(R)));
HIS.objf = [HIS.objf, objf];
HIS.prjg = [HIS.prjg, projnorm];
relerr = norm(V - W*H - R,'fro') / norm(V, 'fro');
HIS.relerr = [HIS.relerr,relerr];
if rem(iter,10) == 0
fprintf('%d:\tstopping criteria = %e,\tobjective value = %f.\n', iter,projnorm/initgrad,HIS.objf(end));
end
if (projnorm < tol*initgrad) || (cputime-initt > timelimit)
break
end
end
elapse = cputime - elapse;
fprintf('\nFinal Iter = %d,\tFinal Elapse = %f.\n', iter,elapse);
% helper functions: alternating non-negative least squares
function [H,grad,iter] = nlssubprob(V,W,Hinit,R,tol,maxiter)
% H , grad = output solution and gradient
% iter = #iterations used
% V , W = constant matrices
% Hinit = initial solution
% R = outlier matrix
% tol = stopping tolerance
% maxiter = limit of iterations
H = Hinit;
V = V - R;
WtV = W'*V;
WtW = W'*W;
alpha = 1; beta = 0.1;
for iter = 1:maxiter
grad = WtW*H - WtV;
projgrad = norm(grad(grad < 0 | H > 0));
if projgrad < tol
break
end
% search step size
for inner_iter = 1:20
Hn = max(H - alpha*grad, 0);
d = Hn-H;
gradd = sum(sum(grad.*d));
dQd = sum(sum((WtW*d).*d));
suff_decr = 0.99*gradd + 0.5*dQd < 0;
if inner_iter == 1
decr_alpha = ~suff_decr;
Hp = H;
end
if decr_alpha
if suff_decr
H = Hn;
break;
else
alpha = alpha * beta;
end
else
if (~suff_decr) | (Hp == Hn)
H = Hp;
break;
else
alpha = alpha/beta;
Hp = Hn;
end
end
end
end
if iter == maxiter
fprintf('Max iter in nlssubprob\n');
end
function pgd = myProjGrad(x, gradx, lLim, uLim)
pgd = gradx .* ((x >= lLim) & (x <= uLim)) + min(0, gradx) .* (x <= lLim) + max(0, gradx) .* (x >= uLim);
function [R,grad,iter] = nlssubprobR(V,W,H,Rinit,lambda,M,tol,maxiter)
% R, grad = output solution and gradient
% iter = #iterations used
% V , W, H = constant matrices
% Rinit = initial solution
% tol = stopping tolerance
% M = algorithm parameter
% maxiter = limit of iterations
R = Rinit;
objective_old = (1/2) * trace((V - W*H - R)'*(V - W*H - R)) + lambda * sum(sum(abs(R)));
iter = 0;
flag = true;
while (iter <= maxiter) && flag
iter = iter + 1;
R = mySoftThresh(V - W*H, lambda, M);
% stopping condition check
objective_new = (1/2) * trace((V - W*H - R)'*(V - W*H - R)) + lambda * sum(sum(abs(R)));
if abs(objective_new - objective_old) < tol * objective_old
flag = false;
else
objective_old = objective_new;
end
end
grad = R - (V - W*H) + lambda*sign(R);
if iter==maxiter
fprintf('Max iter in nlssubprobR\n');
end
function y = mySoftThresh(x, lambda, M)
% Implementation of the soft thresholding function defined in Renbo
% Zhao et al. 2016 "Online NMF with Outliers", eqn 17
% x - input vector of dimension F
% lambda - penalty parameter (here threshold parameter)
% M - parameter of the constraint set of the outlier r
% y - output vector of dimension F
y = 0 * (abs(x) < lambda) + (x - sign(x) * lambda) .* ((x >= lambda) & (x <= M)) + (sign(x) * M) .* (x >= lambda + M);
return
\ No newline at end of file
function [W,H,R,iter,elapse,HIS] = nmf_myneogm(V,W_INIT,H_INIT,R_INIT,LAMBDA,TOL,MAX_ITER)
% Code for non-negative Matrix Factorization with outliers
% Reference
% N. Guan, D. Tao, Z. Luo, and B. Yuan, "NeNMF: An Optimal Gradient Method
% for Non-negative Matrix Factorization", IEEE Transactions on Signal
% Processing, Vol. 60, No. 6, PP. 2882-2898, Jun. 2012. (DOI:
% 10.1109/TSP.2012.2190406)
% Model: V = WH + R
% V (d x n): data matrix including n samples in d-dimensions
% W (d x k): basis matrix including k bases in d-dimensions
% H (k x n): coefficients matrix includeing n encodings in k-dimensions
% R (d x n): outlier matrix includeing n outlier vectors in d-dimensions
% Written by Hafiz Imtiaz (hafiz.imtiaz@rutgers.edu), based on the NeNMF code by Naiyang Guan
% <Inputs>
% V : input data matrix (d x n)
% W_INIT : (d x k) initial value for W.
% H_INIT : (k x n) initial value for H.
% R_INIT : (d x n) initial value for R.
% LAMBDA : regularizer for outliers.
% TOL : stopping tolerance.
% MAX_ITER : maximum number of iterations.
% <Outputs>
% W : estimated basis matrix (d x k).
% H : estimated coefficients matrix (k x n).
% R : estimated outlier matrix (d x n).
% iter : number of top-level iterations.
% elapse : CPU time in seconds.
% HIS : history of computation,
% niter - total iteration number spent for each sub-iteration
% cpus - CPU seconds at iteration rounds,
% objf - objective function values at iteration rounds,
% prjg - projected gradient norm at iteration rounds.
% relerr - relative error at each top-level iteration
% Default setting
MaxIter = MAX_ITER;
MinIter = 10;
MaxTime = 100000;
W0 = W_INIT;
H0 = H_INIT;
R0 = R_INIT;
tol = TOL;
M = 1; % bound on outliers
ITER_MAX = 1000; % maximum inner iteration number for sub-iteration
ITER_MIN = 10; % minimum inner iteration number for sub-iteration
% Initialization
W = W0;
H = H0;
R = R0;
HVt = H*(V-R)';
HHt = H*H';
WtV = W'*(V-R);
WtW = W'*W;
GradW = W * HHt - HVt';
GradH = WtW * H - WtV;
GradR = R - (V - W*H) + LAMBDA*sign(R);
init_delta = sqrt(norm(GradW,'fro')^2 + norm(GradH,'fro')^2 + norm(GradR,'fro')^2);
tolH = max(tol,1e-3)*init_delta;
tolW = tolH; % Stopping tolerance
% Historical information
HIS.niter = 0;
HIS.cpus = 0;
HIS.objf = (1/2) * trace((V - W*H - R)'*(V - W*H - R)) + LAMBDA * sum(sum(abs(R)));
HIS.prjg = init_delta;
HIS.relerr = norm(V - W*H - R,'fro') / norm(V, 'fro');
% Iterative updating
elapse = cputime;
W = W';
for iter = 1:MaxIter
% Optimize H and R with W fixed
[H,R,iterH] = myNNLS(V,W',H,R,LAMBDA,ITER_MIN,ITER_MAX,tol);
if iterH < ITER_MIN
tol = tol/10;
end
HHt = H*H';
HVt = H*(V-R)';
% Optimize W with H and R fixed
[W,iterW,GradW] = NNLS(W,HHt,HVt,ITER_MIN,ITER_MAX,tolW);
if iterW < ITER_MIN
tolW = tolW/10;
end
WtW = W*W';
WtV = W*(V-R);
GradH = WtW * H - WtV;
GradR = R - (V - W'*H) + LAMBDA*sign(R);
HIS.niter = HIS.niter + iterH + iterW;
HIS.cpus = [HIS.cpus, cputime-elapse];
delta = sqrt( norm(myProjGrad(H, GradH, 0, 1e10), 'fro')^2 + ...
norm(myProjGrad(W, GradW, 0, 1e10), 'fro')^2 + ...
norm(myProjGrad(R, GradR, -M, M), 'fro')^2);
% Output running detials
objf = (1/2) * trace((V - W'*H - R)'*(V - W'*H - R)) + LAMBDA * sum(sum(abs(R)));
HIS.objf = [HIS.objf, objf];
HIS.prjg = [HIS.prjg, delta];
relerr = norm(V - W'*H - R,'fro') / norm(V, 'fro');
HIS.relerr = [HIS.relerr,relerr];
if rem(iter,10) == 0
fprintf('%d:\tstopping criteria = %e,\tobjective value = %f.\n', iter,delta/init_delta,HIS.objf(end));
end
% Stopping condition
if (delta <= tol*init_delta && iter >= MinIter) || HIS.cpus(end) >= MaxTime
break;
end
end
W = W';
elapse = cputime - elapse;
fprintf('\nFinal Iter = %d,\tFinal Elapse = %f.\n', iter,elapse);
return;
% Helper functions
function [H,iter,Grad] = NNLS(Z,WtW,WtV,iterMin,iterMax,tol)
if ~issparse(WtW)
L = norm(WtW); % Lipschitz constant
else
L = norm(full(WtW));
end
H = Z; % Initialization
Grad = WtW*Z - WtV; % Gradient
alpha1 = 1;
for iter = 1:iterMax
H0 = H;
H = max(Z - Grad/L,0); % Calculate sequence 'Y'
alpha2 = 0.5*(1 + sqrt(1 + 4*alpha1^2));
Z = H + ((alpha1-1)/alpha2)*(H-H0);
alpha1 = alpha2;
Grad = WtW * Z - WtV;
% Stopping criteria
if iter >= iterMin
% Lin's stopping condition
pgn = norm(Grad(Grad < 0 | Z > 0));
if pgn <= tol
break;
end
end
end
Grad = WtW * H - WtV;
return;
function [H,R,iter,Grad] = myNNLS(V,W,Z,R,lambda,iterMin,iterMax,tol)
WtW = W'*W;
if ~issparse(WtW)
L = norm(WtW); % Lipschitz constant
else
L = norm(full(WtW));
end
H = Z; % Initialization
WtV = W'*(V-R);
Grad = WtW*H - WtV; % Gradient
GradR = R - (V - W*H) + lambda*sign(R);
initgrad = sqrt(norm(Grad, 'fro')^2 + norm(GradR, 'fro')^2);
alpha1 = 1; M = 1;
for iter = 1:iterMax
% update H
H0 = H;
H = max(Z - Grad/L,0); % Calculate sequence 'Y'
alpha2 = 0.5*(1 + sqrt(1 + 4*alpha1^2));
Z = H + ((alpha1-1)/alpha2)*(H-H0);
alpha1 = alpha2;
% update R
R = mySoftThresh(V - W*H, lambda, 1);
GradR = R - (V - W*H) + lambda*sign(R);
WtV = W'*(V-R);
Grad = WtW * Z - WtV;
% Stopping criteria
if iter >= iterMin
% Lin's stopping condition
projnorm = sqrt( norm(myProjGrad(Z, Grad, 0, 1e10), 'fro')^2 + norm(myProjGrad(R, GradR, -M, M), 'fro')^2 );
if (projnorm <= tol*initgrad)
break;
end
end
end
Grad = WtW * H - WtV;
return;
function pgd = myProjGrad(x, gradx, lLim, uLim)
pgd = gradx .* ((x >= lLim) & (x <= uLim)) + min(0, gradx) .* (x <= lLim) + max(0, gradx) .* (x >= uLim);
function y = mySoftThresh(x, lambda, M)
% Implementation of the soft thresholding function defined in Renbo
% Zhao et al. 2016 "Online NMF with Outliers", eqn 17
% x - input vector of dimension F
% lambda - penalty parameter (here threshold parameter)
% M - parameter of the constraint set of the outlier r
% y - output vector of dimension F
y = 0 * (abs(x) < lambda) + (x - sign(x) * lambda) .* ((x >= lambda) & (x <= M)) + (sign(x) * M) .* (x >= lambda + M);
return
\ No newline at end of file
function [W,H,iter,elapse,HIS] = nmf_neogm(V,W_INIT,H_INIT,TOL,MAX_ITER)
% Non-negative Matrix Factorization via Nesterov's Optimal Gradient Method.
% NeNMF: Matlab Code for Efficient NMF Solver
% Reference
% N. Guan, D. Tao, Z. Luo, and B. Yuan, "NeNMF: An Optimal Gradient Method
% for Non-negative Matrix Factorization", IEEE Transactions on Signal
% Processing, Vol. 60, No. 6, PP. 2882-2898, Jun. 2012. (DOI:
% 10.1109/TSP.2012.2190406)
% The model is V \approx WH, where V, W, and H are defined as follows:
% V (m x n-dimension): data matrix including n samples in m-dimension
% space;
% W (m x r-dimension): basis matrix including r bases in m-dimension space;
% H (r x n-dimension): coefficients matrix includeing n encodings in
% r-dimension space.
% Written by Naiyang Guan (ny.guan@gmail.com)
% Copyright 2010-2012 by Naiyang Guan and Dacheng Tao
% Modified at Sept. 25 2011
% Modified at Nov. 4 2012
% <Inputs>
% V : Input data matrix (m x n)
% MAX_ITER : Maximum number of iterations. Default is 1,000.
% W_INIT : (m x r) initial value for W.
% H_INIT : (r x n) initial value for H.
% TOL : Stopping tolerance. Default is 1e-5.
% MAX_ITER : maximum number of iterations