Skip to content

Commit bccd339

Browse files
committed
python3
1 parent ad5058b commit bccd339

File tree

1 file changed

+178
-0
lines changed

1 file changed

+178
-0
lines changed

code-python3/gradient_descent.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
from __future__ import division
2+
from collections import Counter
3+
from linear_algebra import distance, vector_subtract, scalar_multiply
4+
import math, random
5+
6+
def sum_of_squares(v):
7+
"""computes the sum of squared elements in v"""
8+
return sum(v_i ** 2 for v_i in v)
9+
10+
def difference_quotient(f, x, h):
11+
return (f(x + h) - f(x)) / h
12+
13+
def plot_estimated_derivative():
14+
15+
def square(x):
16+
return x * x
17+
18+
def derivative(x):
19+
return 2 * x
20+
21+
derivative_estimate = lambda x: difference_quotient(square, x, h=0.00001)
22+
23+
# plot to show they're basically the same
24+
import matplotlib.pyplot as plt
25+
x = range(-10,10)
26+
plt.plot(x, map(derivative, x), 'rx') # red x
27+
plt.plot(x, map(derivative_estimate, x), 'b+') # blue +
28+
plt.show() # purple *, hopefully
29+
30+
def partial_difference_quotient(f, v, i, h):
31+
32+
# add h to just the i-th element of v
33+
w = [v_j + (h if j == i else 0)
34+
for j, v_j in enumerate(v)]
35+
36+
return (f(w) - f(v)) / h
37+
38+
def estimate_gradient(f, v, h=0.00001):
39+
return [partial_difference_quotient(f, v, i, h)
40+
for i, _ in enumerate(v)]
41+
42+
def step(v, direction, step_size):
43+
"""move step_size in the direction from v"""
44+
return [v_i + step_size * direction_i
45+
for v_i, direction_i in zip(v, direction)]
46+
47+
def sum_of_squares_gradient(v):
48+
return [2 * v_i for v_i in v]
49+
50+
def safe(f):
51+
"""define a new function that wraps f and return it"""
52+
def safe_f(*args, **kwargs):
53+
try:
54+
return f(*args, **kwargs)
55+
except:
56+
return float('inf') # this means "infinity" in Python
57+
return safe_f
58+
59+
60+
#
61+
#
62+
# minimize / maximize batch
63+
#
64+
#
65+
66+
def minimize_batch(target_fn, gradient_fn, theta_0, tolerance=0.000001):
67+
"""use gradient descent to find theta that minimizes target function"""
68+
69+
step_sizes = [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
70+
71+
theta = theta_0 # set theta to initial value
72+
target_fn = safe(target_fn) # safe version of target_fn
73+
value = target_fn(theta) # value we're minimizing
74+
75+
while True:
76+
gradient = gradient_fn(theta)
77+
next_thetas = [step(theta, gradient, -step_size)
78+
for step_size in step_sizes]
79+
80+
# choose the one that minimizes the error function
81+
next_theta = min(next_thetas, key=target_fn)
82+
next_value = target_fn(next_theta)
83+
84+
# stop if we're "converging"
85+
if abs(value - next_value) < tolerance:
86+
return theta
87+
else:
88+
theta, value = next_theta, next_value
89+
90+
def negate(f):
91+
"""return a function that for any input x returns -f(x)"""
92+
return lambda *args, **kwargs: -f(*args, **kwargs)
93+
94+
def negate_all(f):
95+
"""the same when f returns a list of numbers"""
96+
return lambda *args, **kwargs: [-y for y in f(*args, **kwargs)]
97+
98+
def maximize_batch(target_fn, gradient_fn, theta_0, tolerance=0.000001):
99+
return minimize_batch(negate(target_fn),
100+
negate_all(gradient_fn),
101+
theta_0,
102+
tolerance)
103+
104+
#
105+
# minimize / maximize stochastic
106+
#
107+
108+
def in_random_order(data):
109+
"""generator that returns the elements of data in random order"""
110+
indexes = [i for i, _ in enumerate(data)] # create a list of indexes
111+
random.shuffle(indexes) # shuffle them
112+
for i in indexes: # return the data in that order
113+
yield data[i]
114+
115+
def minimize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01):
116+
117+
data = zip(x, y)
118+
theta = theta_0 # initial guess
119+
alpha = alpha_0 # initial step size
120+
min_theta, min_value = None, float("inf") # the minimum so far
121+
iterations_with_no_improvement = 0
122+
123+
# if we ever go 100 iterations with no improvement, stop
124+
while iterations_with_no_improvement < 100:
125+
value = sum( target_fn(x_i, y_i, theta) for x_i, y_i in data )
126+
127+
if value < min_value:
128+
# if we've found a new minimum, remember it
129+
# and go back to the original step size
130+
min_theta, min_value = theta, value
131+
iterations_with_no_improvement = 0
132+
alpha = alpha_0
133+
else:
134+
# otherwise we're not improving, so try shrinking the step size
135+
iterations_with_no_improvement += 1
136+
alpha *= 0.9
137+
138+
# and take a gradient step for each of the data points
139+
for x_i, y_i in in_random_order(data):
140+
gradient_i = gradient_fn(x_i, y_i, theta)
141+
theta = vector_subtract(theta, scalar_multiply(alpha, gradient_i))
142+
143+
return min_theta
144+
145+
def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01):
146+
return minimize_stochastic(negate(target_fn),
147+
negate_all(gradient_fn),
148+
x, y, theta_0, alpha_0)
149+
150+
if __name__ == "__main__":
151+
152+
print("using the gradient")
153+
154+
v = [random.randint(-10,10) for i in range(3)]
155+
156+
tolerance = 0.0000001
157+
158+
while True:
159+
#print v, sum_of_squares(v)
160+
gradient = sum_of_squares_gradient(v) # compute the gradient at v
161+
next_v = step(v, gradient, -0.01) # take a negative gradient step
162+
if distance(next_v, v) < tolerance: # stop if we're converging
163+
break
164+
v = next_v # continue if we're not
165+
166+
print("minimum v", v)
167+
print("minimum value", sum_of_squares(v))
168+
print()
169+
170+
171+
print("using minimize_batch")
172+
173+
v = [random.randint(-10,10) for i in range(3)]
174+
175+
v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v)
176+
177+
print("minimum v", v)
178+
print("minimum value", sum_of_squares(v))

0 commit comments

Comments
 (0)