擬牛頓法(Python實現)
使用擬牛頓法(BFGS和DFP),分別使用Armijo准則和Wolfe准則來求步長
求解方程
\(f(x_1,x_2)=(x_1^2-2)^4+(x_1-2x_2)^2\)的極小值
import numpy as np
# import tensorflow as tf
def gfun(x): # 梯度
# x = tf.Variable(x, dtype=tf.float32)
# with tf.GradientTape() as tape:
# tape.watch(x)
# z = fun(x)
# return tape.gradient(z, x).numpy() # 這里使用TensorFlow來求梯度,直接手算梯度返回也行
return np.array([4 * (x[0] - 2) ** 3 + 2 * (x[0] - 2 * x[1]), -4 * (x[0] - 2 * x[1])]).reshape(len(x), 1)
def fun(x): # 函數
return (x[0] - 2) ** 4 + (x[0] - 2 * x[1]) ** 2
def bfgs_armijo(x0):
'''秩1的基於armijo搜索的擬牛頓算法'''
maxk = 5000
rho = .55
sigma = .4
epsilon = 1e-5
k = 0
n = len(x0)
Hk = np.eye(n)
while k < maxk:
gk = gfun(x0)
if np.linalg.norm(gk) < epsilon:
break
dk = -Hk @ gk
m = 0
mk = 0
while m < 20: # 使用Armijo搜索(非精確線搜索)
if fun(x0 + rho ** m * dk) < fun(x0) + sigma * rho ** m * gk.T @ dk:
mk = m
break
m += 1
x = x0 + rho ** mk * dk
sk = x - x0
yk = gfun(x) - gk
Hk = Hk + (sk - Hk @ yk) @ (sk - Hk @ yk).T / ((sk - Hk @ yk).T @ yk)
k += 1
x0 = x
return x0, fun(x0), k
def bfgs_wolfe(x0):
'''基於wolfe搜索的秩1的擬牛頓算法'''
maxk = 5000
epsilon = 1e-5
k = 0
n = len(x0)
Hk = np.eye(n)
while k < maxk:
gk = gfun(x0)
if np.linalg.norm(gk) < epsilon:
break
dk = -Hk @ gk
rho = 0.4
sigma = 0.5
a = 0
b = np.inf
alpha = 1
while True: # 使用Wolfe搜索
if not ((fun(x0) - fun(x0 + alpha * dk)) >= (-rho * alpha * gfun(x0).T @ dk)):
b = alpha
alpha = (a + alpha) / 2
continue
if not (gfun(x0 + alpha * dk).T @ dk >= sigma * gfun(x0).T @ dk):
a = alpha
alpha = np.min([2 * alpha, (alpha + b) / 2])
continue
break
x = x0 + alpha * dk
sk = x - x0
yk = gfun(x) - gk
Hk = Hk + (sk - Hk @ yk) @ (sk - Hk @ yk).T / ((sk - Hk @ yk).T @ yk)
k += 1
x0 = x
return x0, fun(x0), k
def dfp_wolfe(x0):
'''基於armijo搜索的秩2的擬牛頓法
當采用精確線搜索時,矩陣序列Hk的正定性條件sk.T@yk>0可以被滿足
一般來說,armijo准則不能滿足這一條件需要修正一個條件:yk.T@sk>0
'''
maxk = 5000
epsilon = 1e-5
k = 0
n = len(x0)
Hk = np.eye(n) # 初始化Hk為單位陣
while k < maxk:
gk = gfun(x0)
if np.linalg.norm(gk) < epsilon: # 迭代結束條件
break
dk = -Hk @ gk
rho = 0.4
sigma = 0.5
a = 0
b = np.inf
alpha = 1
while True:
if not ((fun(x0) - fun(x0 + alpha * dk)) >= (-rho * alpha * gfun(x0).T @ dk)):
b = alpha
alpha = (a + alpha) / 2
continue
if not (gfun(x0 + alpha * dk).T @ dk >= sigma * gfun(x0).T @ dk):
a = alpha
alpha = np.min([2 * alpha, (alpha + b) / 2])
continue
break
x = x0 + alpha * dk
sk = x - x0
yk = gfun(x) - gk
Hk = Hk - (Hk @ yk @ yk.T @ Hk) / (yk.T @ Hk @ yk) + (sk @ sk.T) / (sk.T @ yk)
k += 1
x0 = x
return x0, fun(x0), k
def dfp_armijo(x0):
'''基於armijo搜索的秩2的擬牛頓法
當采用精確線搜索時,矩陣序列Hk的正定性條件sk.T@yk>0可以被滿足
一般來說,armijo准則不能滿足這一條件需要修正一個條件:yk.T@sk>0
'''
maxk = 5000
rho = .55
sigma = .4
epsilon = 1e-5
k = 0
n = len(x0)
Hk = np.eye(n)
while k < maxk:
gk = gfun(x0)
if np.linalg.norm(gk) < epsilon:
break
dk = -Hk @ gk
m = 0
mk = 0
while m < 20: # 使用Armijo搜索(非精確線搜索)
if fun(x0 + rho ** m * dk) < fun(x0) + sigma * rho ** m * gk.T @ dk:
mk = m
break
m += 1
x = x0 + rho ** mk * dk
sk = x - x0
yk = gfun(x) - gk
if sk.T @ yk > 0:
Hk = Hk - (Hk @ yk @ yk.T @ Hk) / (yk.T @ Hk @ yk) + (sk @ sk.T) / (sk.T @ yk)
k += 1
x0 = x
return x0, fun(x0), k
if __name__ == '__main__':
x0 = np.array([[0], [0]])
x0, val, k = bfgs_armijo(x0) # BFGS使用armijo准則
print(f'近似最優點:\n{x0}\n迭代次數:{k}\n目標函數值:{val.item()}')
x0 = np.array([[0], [0]])
x0, val, k = bfgs_wolfe(x0) # BFGS使用wolfe准則
print(f'近似最優點:\n{x0}\n迭代次數:{k}\n目標函數值:{val.item()}')
x0 = np.array([[0], [0]])
x0, val, k = dfp_armijo(x0) # DFP使用armijo准則
print(f'近似最優點:\n{x0}\n迭代次數:{k}\n目標函數值:{val.item()}')
x0 = np.array([[0], [0]])
x0, val, k = dfp_wolfe(x0) # DFP使用wolfe准則
print(f'近似最優點:\n{x0}\n迭代次數:{k}\n目標函數值:{val.item()}')