Skip to content

Commit cee417a

Browse files
committed
Rank-1 NES algorithm
1 parent 87c7ac3 commit cee417a

File tree

2 files changed

+188
-1
lines changed

2 files changed

+188
-1
lines changed
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
__author__ = 'Tom Schaul, Tobias Glasmachers'
2+
3+
4+
from scipy import dot, array, randn, exp, floor, log, sqrt, ones, multiply, log2
5+
6+
from pybrain.tools.rankingfunctions import HansenRanking
7+
from pybrain.optimization.distributionbased.distributionbased import DistributionBasedOptimizer
8+
9+
10+
11+
class Rank1NES(DistributionBasedOptimizer):
12+
""" Natural Evolution Strategies with rank-1 covariance matrices.
13+
14+
See http://arxiv.org/abs/1106.1998 for a description. """
15+
16+
# parameters, which can be set but have a good (adapted) default value
17+
centerLearningRate = 1.0
18+
scaleLearningRate = None
19+
covLearningRate = None
20+
batchSize = None
21+
uniformBaseline = True
22+
shapingFunction = HansenRanking()
23+
24+
# fixed settings
25+
mustMaximize = True
26+
storeAllEvaluations = True
27+
storeAllEvaluated = True
28+
storeAllDistributions = True
29+
storeAllRates = True
30+
verboseGaps = 1
31+
initVariance = 1.
32+
varianceCutoff = 1e-20
33+
34+
35+
def _additionalInit(self):
36+
# heuristic settings
37+
if self.covLearningRate is None:
38+
self.covLearningRate = self._initLearningRate()
39+
if self.scaleLearningRate is None:
40+
self.scaleLearningRate = self.covLearningRate
41+
if self.batchSize is None:
42+
self.batchSize = self._initBatchSize()
43+
44+
# other initializations
45+
self._center = self._initEvaluable.copy()
46+
self._logDetA = log(self.initVariance) / 2
47+
self._principalVector = randn(self.numParameters)
48+
self._principalVector /= sqrt(dot(self._principalVector, self._principalVector))
49+
self._allDistributions = [(self._center.copy(), self._principalVector.copy(), self._logDetA)]
50+
self.covLearningRate = 0.1
51+
self.batchSize = int(max(5,max(4*log2(self.numParameters),0.2*self.numParameters)))
52+
self.uniformBaseline = False
53+
self.scaleLearningRate = 0.1
54+
55+
def _stoppingCriterion(self):
56+
if DistributionBasedOptimizer._stoppingCriterion(self):
57+
return True
58+
elif self._getMaxVariance < self.varianceCutoff:
59+
return True
60+
else:
61+
return False
62+
63+
@property
64+
def _getMaxVariance(self):
65+
return exp(self._logDetA * 2 / self.numParameters)
66+
67+
def _initLearningRate(self):
68+
return 0.6 * (3 + log(self.numParameters)) / self.numParameters / sqrt(self.numParameters)
69+
70+
def _initBatchSize(self):
71+
return 4 + int(floor(3 * log(self.numParameters)))
72+
73+
@property
74+
def _population(self):
75+
return [self._allEvaluated[i] for i in self._pointers]
76+
77+
@property
78+
def _currentEvaluations(self):
79+
fits = [self._allEvaluations[i] for i in self._pointers]
80+
if self._wasOpposed:
81+
fits = map(lambda x:-x, fits)
82+
return fits
83+
84+
def _produceSample(self):
85+
return randn(self.numParameters + 1)
86+
87+
def _produceSamples(self):
88+
""" Append batch size new samples and evaluate them. """
89+
tmp = [self._sample2base(self._produceSample()) for _ in range(self.batchSize)]
90+
map(self._oneEvaluation, tmp)
91+
self._pointers = list(range(len(self._allEvaluated) - self.batchSize, len(self._allEvaluated)))
92+
93+
def _notify(self):
94+
""" Provide some feedback during the run. """
95+
if self.verbose:
96+
if self.numEvaluations % self.verboseGaps == 0:
97+
print 'Step:', self.numLearningSteps, 'best:', self.bestEvaluation,
98+
print 'logVar', round(self._logDetA, 3),
99+
print 'log|vector|', round(log(dot(self._principalVector, self._principalVector))/2, 3)
100+
101+
if self.listener is not None:
102+
self.listener(self.bestEvaluable, self.bestEvaluation)
103+
104+
def _learnStep(self):
105+
# concatenations of y vector and z value
106+
samples = [self._produceSample() for _ in range(self.batchSize)]
107+
108+
u = self._principalVector
109+
a = self._logDetA
110+
111+
# unnamed in paper (y+zu), or x/exp(lambda)
112+
W = [s[:-1] + u * s[-1] for s in samples]
113+
points = [self._center+exp(a) *w for w in W]
114+
115+
map(self._oneEvaluation, points)
116+
117+
self._pointers = list(range(len(self._allEvaluated) - self.batchSize, len(self._allEvaluated)))
118+
119+
utilities = self.shapingFunction(self._currentEvaluations)
120+
utilities /= sum(utilities) # make the utilities sum to 1
121+
if self.uniformBaseline:
122+
utilities -= 1. / self.batchSize
123+
124+
W = [w for i,w in enumerate(W) if utilities[i] != 0]
125+
utilities = [uw for uw in utilities if uw != 0]
126+
127+
dim = self.numParameters
128+
129+
r = sqrt(dot(u, u))
130+
v = u / r
131+
c = log(r)
132+
133+
#inner products, but not scaled with exp(lambda)
134+
wws = array([dot(w, w) for w in W])
135+
wvs = array([dot(v, w) for w in W])
136+
wv2s = array([wv ** 2 for wv in wvs])
137+
138+
dCenter = exp(self._logDetA) * dot(utilities, W)
139+
self._center += self.centerLearningRate * dCenter
140+
141+
kp = ((r ** 2 - dim + 2) * wv2s - (r ** 2 + 1) * wws) / (2 * r * (dim - 1.))
142+
143+
# natural gradient on lambda, equation (5)
144+
da = 1. / (2 * (dim - 1)) * dot((wws - dim) - (wv2s - 1), utilities)
145+
146+
# natural gradient on u, equation (6)
147+
du = dot(kp, utilities) * v + dot(multiply(wvs / r, utilities), W)
148+
149+
# equation (7)
150+
dc = dot(du, v) / r
151+
152+
# equation (8)
153+
dv = du / r - dc * v
154+
155+
epsilon = min(self.covLearningRate, 2 * sqrt(r ** 2 / dot(du, du)))
156+
if dc > 0:
157+
# additive update
158+
self._principalVector += epsilon * du
159+
else:
160+
# multiplicative update
161+
# prevents instability
162+
c += epsilon * dc
163+
v += epsilon * dv
164+
v /= sqrt(dot(v, v))
165+
r = exp(c)
166+
self._principalVector = r * v
167+
168+
self._lastLogDetA = self._logDetA
169+
self._logDetA += self.scaleLearningRate * da
170+
171+
if self.storeAllDistributions:
172+
self._allDistributions.append((self._center.copy(), self._principalVector.copy(), self._logDetA))
173+
174+
175+
def test():
176+
""" Rank-1 NEX easily solves high-dimensional Rosenbrock functions. """
177+
from pybrain.rl.environments.functions.unimodal import RosenbrockFunction
178+
dim = 40
179+
f = RosenbrockFunction(dim)
180+
x0 = -ones(dim)
181+
l = Rank1NES(f, x0, verbose=True, verboseGaps=500)
182+
l.learn()
183+
184+
185+
if __name__ == '__main__':
186+
test()

pybrain/tools/neuralnets.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ def setupRNN(self, trainer=BackpropTrainer, hidden=None, **trnargs):
208208
self.hidden = hidden
209209
self._convertAllDataToOneOfMany()
210210

211-
RNN = buildNetwork(self.DS.indim, self.hidden, self.DS.outdim, hiddenclass=LSTMLayer, outclass=SoftmaxLayer)
211+
RNN = buildNetwork(self.DS.indim, self.hidden, self.DS.outdim, hiddenclass=LSTMLayer,
212+
recurrent=True, outclass=SoftmaxLayer)
212213
logging.info("Constructing classification RNN with following config:")
213214
logging.info(str(RNN) + "\n Hidden units:\n " + str(self.hidden))
214215
logging.info("Trainer received the following special arguments:")

0 commit comments

Comments
 (0)