1+ __author__ = 'Tom Schaul, Tobias Glasmachers'
2+
3+
4+ from scipy import dot , array , randn , exp , floor , log , sqrt , ones , multiply , log2
5+
6+ from pybrain .tools .rankingfunctions import HansenRanking
7+ from pybrain .optimization .distributionbased .distributionbased import DistributionBasedOptimizer
8+
9+
10+
11+ class Rank1NES (DistributionBasedOptimizer ):
12+ """ Natural Evolution Strategies with rank-1 covariance matrices.
13+
14+ See http://arxiv.org/abs/1106.1998 for a description. """
15+
16+ # parameters, which can be set but have a good (adapted) default value
17+ centerLearningRate = 1.0
18+ scaleLearningRate = None
19+ covLearningRate = None
20+ batchSize = None
21+ uniformBaseline = True
22+ shapingFunction = HansenRanking ()
23+
24+ # fixed settings
25+ mustMaximize = True
26+ storeAllEvaluations = True
27+ storeAllEvaluated = True
28+ storeAllDistributions = True
29+ storeAllRates = True
30+ verboseGaps = 1
31+ initVariance = 1.
32+ varianceCutoff = 1e-20
33+
34+
35+ def _additionalInit (self ):
36+ # heuristic settings
37+ if self .covLearningRate is None :
38+ self .covLearningRate = self ._initLearningRate ()
39+ if self .scaleLearningRate is None :
40+ self .scaleLearningRate = self .covLearningRate
41+ if self .batchSize is None :
42+ self .batchSize = self ._initBatchSize ()
43+
44+ # other initializations
45+ self ._center = self ._initEvaluable .copy ()
46+ self ._logDetA = log (self .initVariance ) / 2
47+ self ._principalVector = randn (self .numParameters )
48+ self ._principalVector /= sqrt (dot (self ._principalVector , self ._principalVector ))
49+ self ._allDistributions = [(self ._center .copy (), self ._principalVector .copy (), self ._logDetA )]
50+ self .covLearningRate = 0.1
51+ self .batchSize = int (max (5 ,max (4 * log2 (self .numParameters ),0.2 * self .numParameters )))
52+ self .uniformBaseline = False
53+ self .scaleLearningRate = 0.1
54+
55+ def _stoppingCriterion (self ):
56+ if DistributionBasedOptimizer ._stoppingCriterion (self ):
57+ return True
58+ elif self ._getMaxVariance < self .varianceCutoff :
59+ return True
60+ else :
61+ return False
62+
63+ @property
64+ def _getMaxVariance (self ):
65+ return exp (self ._logDetA * 2 / self .numParameters )
66+
67+ def _initLearningRate (self ):
68+ return 0.6 * (3 + log (self .numParameters )) / self .numParameters / sqrt (self .numParameters )
69+
70+ def _initBatchSize (self ):
71+ return 4 + int (floor (3 * log (self .numParameters )))
72+
73+ @property
74+ def _population (self ):
75+ return [self ._allEvaluated [i ] for i in self ._pointers ]
76+
77+ @property
78+ def _currentEvaluations (self ):
79+ fits = [self ._allEvaluations [i ] for i in self ._pointers ]
80+ if self ._wasOpposed :
81+ fits = map (lambda x :- x , fits )
82+ return fits
83+
84+ def _produceSample (self ):
85+ return randn (self .numParameters + 1 )
86+
87+ def _produceSamples (self ):
88+ """ Append batch size new samples and evaluate them. """
89+ tmp = [self ._sample2base (self ._produceSample ()) for _ in range (self .batchSize )]
90+ map (self ._oneEvaluation , tmp )
91+ self ._pointers = list (range (len (self ._allEvaluated ) - self .batchSize , len (self ._allEvaluated )))
92+
93+ def _notify (self ):
94+ """ Provide some feedback during the run. """
95+ if self .verbose :
96+ if self .numEvaluations % self .verboseGaps == 0 :
97+ print 'Step:' , self .numLearningSteps , 'best:' , self .bestEvaluation ,
98+ print 'logVar' , round (self ._logDetA , 3 ),
99+ print 'log|vector|' , round (log (dot (self ._principalVector , self ._principalVector ))/ 2 , 3 )
100+
101+ if self .listener is not None :
102+ self .listener (self .bestEvaluable , self .bestEvaluation )
103+
104+ def _learnStep (self ):
105+ # concatenations of y vector and z value
106+ samples = [self ._produceSample () for _ in range (self .batchSize )]
107+
108+ u = self ._principalVector
109+ a = self ._logDetA
110+
111+ # unnamed in paper (y+zu), or x/exp(lambda)
112+ W = [s [:- 1 ] + u * s [- 1 ] for s in samples ]
113+ points = [self ._center + exp (a ) * w for w in W ]
114+
115+ map (self ._oneEvaluation , points )
116+
117+ self ._pointers = list (range (len (self ._allEvaluated ) - self .batchSize , len (self ._allEvaluated )))
118+
119+ utilities = self .shapingFunction (self ._currentEvaluations )
120+ utilities /= sum (utilities ) # make the utilities sum to 1
121+ if self .uniformBaseline :
122+ utilities -= 1. / self .batchSize
123+
124+ W = [w for i ,w in enumerate (W ) if utilities [i ] != 0 ]
125+ utilities = [uw for uw in utilities if uw != 0 ]
126+
127+ dim = self .numParameters
128+
129+ r = sqrt (dot (u , u ))
130+ v = u / r
131+ c = log (r )
132+
133+ #inner products, but not scaled with exp(lambda)
134+ wws = array ([dot (w , w ) for w in W ])
135+ wvs = array ([dot (v , w ) for w in W ])
136+ wv2s = array ([wv ** 2 for wv in wvs ])
137+
138+ dCenter = exp (self ._logDetA ) * dot (utilities , W )
139+ self ._center += self .centerLearningRate * dCenter
140+
141+ kp = ((r ** 2 - dim + 2 ) * wv2s - (r ** 2 + 1 ) * wws ) / (2 * r * (dim - 1. ))
142+
143+ # natural gradient on lambda, equation (5)
144+ da = 1. / (2 * (dim - 1 )) * dot ((wws - dim ) - (wv2s - 1 ), utilities )
145+
146+ # natural gradient on u, equation (6)
147+ du = dot (kp , utilities ) * v + dot (multiply (wvs / r , utilities ), W )
148+
149+ # equation (7)
150+ dc = dot (du , v ) / r
151+
152+ # equation (8)
153+ dv = du / r - dc * v
154+
155+ epsilon = min (self .covLearningRate , 2 * sqrt (r ** 2 / dot (du , du )))
156+ if dc > 0 :
157+ # additive update
158+ self ._principalVector += epsilon * du
159+ else :
160+ # multiplicative update
161+ # prevents instability
162+ c += epsilon * dc
163+ v += epsilon * dv
164+ v /= sqrt (dot (v , v ))
165+ r = exp (c )
166+ self ._principalVector = r * v
167+
168+ self ._lastLogDetA = self ._logDetA
169+ self ._logDetA += self .scaleLearningRate * da
170+
171+ if self .storeAllDistributions :
172+ self ._allDistributions .append ((self ._center .copy (), self ._principalVector .copy (), self ._logDetA ))
173+
174+
175+ def test ():
176+ """ Rank-1 NEX easily solves high-dimensional Rosenbrock functions. """
177+ from pybrain .rl .environments .functions .unimodal import RosenbrockFunction
178+ dim = 40
179+ f = RosenbrockFunction (dim )
180+ x0 = - ones (dim )
181+ l = Rank1NES (f , x0 , verbose = True , verboseGaps = 500 )
182+ l .learn ()
183+
184+
185+ if __name__ == '__main__' :
186+ test ()
0 commit comments