阻尼牛顿法(Python实现)
使用牛顿方向,分别使用Armijo准则和Wolfe准则来求步长
求解方程
\(f(x_1,x_2)=(x_1^2-2)^4+(x_1-2x_2)^2\)的极小值
import numpy as np
import tensorflow as tf
def fun(x): # 函数f(x)
# return 100 * (x[0] ** 2 - x[1]) ** 2 + (x[0] - 1) ** 2 测试用
return (x[0] - 2) ** 4 + (x[0] - 2 * x[1]) ** 2
def hessian(x): # 黑塞阵
return np.array([[12 * (x[0] - 2) ** 2 + 2, -4],
[-4, 8]], dtype=np.float32)
def gfun(x): # 梯度grad
x = tf.Variable(x)
with tf.GradientTape() as tape:
# tape.watch(x)
z = fun(x)
return tape.gradient(z, x).numpy()
# return np.array([4 * (x[0] - 2) ** 3 + 2 * (x[0] - 2*x[1]), -4 * (x[0] - 2*x[1])])
def dampnm_armijo(fun, gfun, hessian, x0): # 使用Armijo准则来求步长因子的阻尼牛顿法
maxk = 100
rho = .55
sigma = .4
k = 0
epsilon = 1e-5
while k < maxk:
gk = gfun(x0)
Gk = hessian(x0)
dk = -np.linalg.inv(Gk) @ gk
if np.linalg.norm(gk) < epsilon:
break
m = 0
mk = 0
while m < 20:
if fun(x0 + rho ** m * dk) < fun(x0) + sigma * rho ** m * gk.T @ dk:
mk = m
break
m += 1
x0 = x0 + rho ** mk * dk
k += 1
x = x0
val = fun(x)
return x, val, k
def dampnm_wolfe(fun, gfun, hessian, x0): # 使用Wolfe准则来求步长因子的阻尼牛顿法
maxk = 1000
k = 0
epsilon = 1e-5
while k < maxk:
gk = gfun(x0)
Gk = hessian(x0)
dk = -np.linalg.inv(Gk) @ gk
if np.linalg.norm(gk) < epsilon:
break
# m = 0
rho = 0.4
sigma = 0.5
a = 0
b = np.inf
alpha = 1
# j = 0
while True:
if not ((fun(x0) - fun(x0 + alpha * dk)) >= (-rho * alpha * gfun(x0).T @ dk)):
# j+=1
b = alpha
alpha = (a + alpha) / 2
continue
if not (gfun(x0 + alpha * dk).T @ dk >= sigma * gfun(x0).T @ dk):
a = alpha
alpha = np.min([2 * alpha, (alpha + b) / 2])
continue
break
x0 = x0 + alpha * dk
k += 1
x = x0
val = fun(x)
return x, val, k
if __name__ == '__main__':
x0 = np.array([[0.], [3.]])
x, val, k = dampnm_armijo(fun, gfun, hessian, x0) # Armijo准则
print('近似最优点:{}\n最优值:{}\n迭代次数:{}'.format(x, val.item(), k))
x, val, k = dampnm_wolfe(fun, gfun, hessian, x0) # wolfe准则
print('近似最优点:{}\n最优值:{}\n迭代次数:{}'.format(x, val.item(), k))
运行结果: