-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomb_ucb1.py
95 lines (78 loc) · 2.34 KB
/
comb_ucb1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import fpl_rw as fpl
def comb_ucb1(l, s):
(t, dim) = l.shape
c = np.zeros(t)
u = np.zeros(dim)
# initialization
w1, t0 = init(l, s)
w = np.zeros((t-t0+2, dim))
w[0] = w1 # shift of 1 between here and notations of the article
counter = np.zeros((2, dim)) # we just need to store counters for times ti-1 (counter[0]) and ti (counter[1])
counter[0] = np.ones(dim)
for ti in range(t-t0+1):
loss = l[t0+ti-1]
# compute UCBs
for i in range(dim):
u[i] = w[counter[0][i]-1][i] - radius(t0+ti-1, counter[0][i]) # lower bound of the confidence interval
# solve the optimization problem
a = fpl.opt(s, u)
c[t0+ti-1] = np.dot(a, loss)
# update statistics
counter[1] = counter[0]
for i in range(dim):
if a[i] == 1:
counter[1][i] += 1
w[counter[1][i]-1][i] = (counter[0][i]*w[counter[0][i]-1][i] + loss[i])/counter[1][i]
counter[0] = counter[1]
return c, t0
def init(l, s):
dim = len(s[0])
w = np.zeros(dim)
u = np.ones(dim)
t = 1
while 1 in u:
a = fpl.opt(s, -u) # negative sign here because we minimize
for i in range(dim):
if a[i] == 1:
w[i] = l[t-1][i]
u[i] = 0
t += 1
return w, t
def radius(x, y):
return np.sqrt(1.5*np.log(x)/y)
if __name__ == "__main__":
T = 3000
d = 10
# m = d
m = 1
# S = s_generator(d, n)
S = np.eye(d)
L = 1/2*np.ones((T, d))
L[:, 0] = 0
L[100:, 0] = 1/4
# L = np.zeros((T, d))
L = np.random.rand(T, d)
num = 1
results = comb_ucb1(L, S)
res = results[0]
T0 = results[1]
regrets = fpl.expected_regrets(L, S, num, method="comb_ucb1")
print 'd = {}'.format(d)
print 'm = {}'.format(m)
t_vect = np.arange(T-T0+1) + T0 - 1
plt.figure()
plt.plot(t_vect, np.cumsum(res[T0-1:]))
plt.title("CombUCB1 algorithm")
plt.xlabel("time")
plt.ylabel("learner's cumulative loss")
plt.figure()
plt.title("CombUCB1 algorithm")
plt.plot(t_vect, regrets[0])
# plt.fill_between(t_vect, regrets[1], regrets[2], color='grey')
plt.xlabel("time")
plt.ylabel("regret")
plt.show()