-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathWSSR_PGD_euclid.m
137 lines (101 loc) · 3.5 KB
/
WSSR_PGD_euclid.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
% This function solves the Weighted Sparse Simplex Representation (WSSR)
% problem through Projected Gradient Descent (PGD). We first solves the
% subproblem of WSSR analytically to obtain \beta_0, then we project
% \beta_0 to the probability simplex to obtain \beta_1. We use \beta_1 as
% the initial solution vector to the PGD algorithm.
%%%% Inputs:
% X: N by P data matrix.
% k: the number of nearest neighbors to consider.
% rho: the l1 penalty parameter.
% normalize: 1 or 0 depending on whether to normalize the data or not.
% ss: the initial step size -- we use backtracking line search.
% MaxIter: the maximum number of iterations to run PGD.
%%% Outputs:
% W: the N by N coefficient matrix.
% obj_stars: a vector of length N whosen entries contain the objective
% function values for each point.
% Last edited: 15 Apr. 2020
function [W, obj_stars, obj_mat] = WSSR_PGD_euclid(X, k, rhos, normalize, ss, MaxIter, thr)
N = size(X, 1);
W = zeros(N);
obj_stars = zeros(N ,1);
obj_mat = zeros(N, MaxIter);
epsilon = 1e-4;
beta = 0.8;
alpha = 0.3;
if nargin < 4
normalize = 1;
end
if normalize == 1
X0 = X;
X = norml2(X0, 1);
end
if length(rhos) == 1
rhos = ones(N, 1)*rhos;
end
if nargin < 5
num = 1;
end
if nargin < 6
MaxIter = 100;
end
if nargin < 7
thr = 1e-4;
end
%%
for i = 1:N
rho = rhos(i);
%% calculate the Euclidean distances
yopt = X(i,:)';
dists_sq = sum((repmat(yopt', N, 1) - X).^2, 2);
dists = arrayfun(@(x) sqrt(x), dists_sq);
dists(i) = Inf; % don't choose itself
[vals, inds]= sort(dists, 'ascend');
nn = inds(1:k);
dk = max(vals(1:k), epsilon);
D = diag(dk);
Ynew = X(nn,:)'; % P x k
%% solve a system of linear equations for the subproblem
a = Ynew'*Ynew + epsilon.*D'*D;
b = ones(k, 1);
A = [a, b; b', 0];
B = [Ynew'*yopt-rho*D*b; 1];
beta_le = linsolve(A,B); % solve the system of linear equations
beta_cur = beta_le(1:k); % \beta_0
beta_cur = SimplexProj(beta_cur); % \beta_1
%% Projected Gradient Descent (PGD)
betas = [];
iter = 1;
while iter <= MaxIter
% calculate the gradient
g = -Ynew'*yopt + Ynew'*Ynew*beta_cur + rho.*diag(D) + epsilon.*D'*D*beta_cur;
% gradient update step
beta1 = beta_cur - ss.*g;
left = ObjVal(yopt, Ynew, beta1, D, rho);
right = ObjVal(yopt, Ynew, beta_cur, D, rho) - alpha*ss*norm(g).^2;
% backtracking line search
while left > right
ss = beta*ss;
beta1 = beta_cur - ss.*g;
left = ObjVal(yopt, Ynew, beta1, D, rho);
right = ObjVal(yopt, Ynew, beta_cur, D, rho) - 0.5*ss*norm(g).^2;
end
% gradient update step (using updated step size)
beta1 = beta_cur - ss.*g;
% projection onto the probability simplex
beta_cur = SimplexProj(beta1);
betas(iter,:) = beta_cur;
% calculate the current objective function value
obj = ObjVal(yopt, Ynew, beta_cur, D, rho);
obj_mat(i,iter) = obj; % the objective function value over iterations for one point
% stop when the objective stops decreasing
if obj < thr
obj_mat(i,iter:end) = obj;
break
end
iter = iter + 1;
end
[obj_stars(i), id] = min(obj_mat(i,:)); % the vector of objective function values for all points
W(i,nn) = betas(id,:);
end
end