Hessian matrix estimator
andyfaff
#pragma rtGlobals=3 // Use modern global access method and strict wave access.
//See the following link for central difference problems
//http://www.holoborodko.com/pavel/numerical-methods/numerical-derivative…
//the following link says why an EPS of 1e-5 should be used. Basically, if the accuracy for the central difference algorithm is A, then the step size, H, should be A^0.3333.
//Thus, if you calculate in DP, the accuracy is on the order of 1e-15. The cube root of 1e-15 is 1e-5.
//http://scicomp.stackexchange.com/questions/1875/adaptive-h-for-gradient…
CONSTANT EPS = 1e-5
Function Hessian(fun, vec, oi)
//this function calculates the second order mixed derivatives of the scalar function fun, around the locus values contained in the vec array
//
//Function fun(vec, oi)
// Wave vec
// struct otherInfo, &oi
// //do some calculation
// return val
// End
//
// vec is an array that specifies the locus around which the derivative matrix is to be estimated
//
// oi is a structure containing other information to pass to the fun function. You can alter this structure to make sense for your function.
//
// One of it main uses is in estimating the Hessian matrix for a least squares problem (the inverse of which gives the covariance matrix).
//
//If you use this to calculate the covariance matrix then you have to follow these steps:
// 1) divide Hessian by 2.
// 2) Take the matrix inverse
// 3) Multiply by chi2/(number of fitted points - number of fitted parameters)
Funcref energy, fun
Wave vec
Struct otherinfo &oi
variable ii, jj, nvec, t0, t1, t2, t3
nvec = numpnts(vec)
make/n=(nvec, nvec)/d/o M_hessian
duplicate/free vec, scale, freevec, normal
redimension/d scale, freevec, normal
scale = 1/vec
normal = 1
for(ii = 0 ; ii < nvec ; ii += 1)
for(jj = ii ; jj < nvec ; jj+=1)
normal = 1
if(ii == jj)
normal[ii] = 1 - 2 * EPS
freevec = normal/scale
t0 = fun(freevec, oi)
normal[ii] = 1 - EPS
freevec = normal /scale
t1 = fun(freevec, oi)
normal[ii] = 1
freevec = normal /scale
t2 = fun(freevec, oi)
normal[ii] = 1 + EPS
freevec = normal /scale
t3 = fun(freevec, oi)
normal[ii] = 1 + 2 * EPS
freevec = normal /scale
t4 = fun(freevec, oi)
M_Hessian[ii][jj] = (-t0 + 16 * t1 - 30 * t2 + 16 * t3 - t4) / 12/EPS/EPS*scale[ii]*scale[ii]
else
//f -1,-1
normal[ii] = 1 * (1 - EPS)
normal[jj] = 1 * (1 - EPS)
freevec = normal / scale
t0 = fun(freevec, oi)
//f +1,+1
normal[ii] = 1 * (1 + EPS)
normal[jj] = 1 * (1 + EPS)
freevec = normal / scale
t1 = fun(freevec, oi)
//f +1,-1
normal[ii] = 1 * (1 + EPS)
normal[jj] = 1 * (1 - EPS)
freevec = normal / scale
t2 = fun(freevec, oi)
//f -1,+1
normal[ii] = 1 * (1 - EPS)
normal[jj] = 1 * (1 + EPS)
freevec = normal / scale
t3 = fun(freevec, oi)
M_hessian[ii][jj] = (t0 + t1 - t2 - t3)/4/EPS/EPS*scale[ii]*scale[jj]
M_hessian[jj][ii] = M_hessian[ii][jj]
// the following is a higher order calculation, which is more expensive, but may be worth while.
// //f 1,-2
// normal[ii] = 1 + EPS
// normal[jj] = 1 - 2 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f +2, -1
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1- EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f -2,+1
// normal[ii] = 1 - 2 * EPS
// normal[jj] = 1 + EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f -1,+1
// normal[ii] = 1 - EPS
// normal[jj] = 1 + 2 * EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
//
// M_hessian[ii][jj] = -63 * (t0 + t1 + t2 + t3)
//
// //f -1,-2
// normal[ii] = 1 - EPS
// normal[jj] = 1 - 2 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f -2, -1
// normal[ii] = 1 - 2 * EPS
// normal[jj] = 1- EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f +1,+2
// normal[ii] = 1 + EPS
// normal[jj] = 1 + 2* EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f+2,+1
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1 + EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
// M_hessian[ii][jj] += 63 * (t0 + t1 + t2 + t3)
//
// //f 2,-2
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1 - 2 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f -2, 2
// normal[ii] = 1 - 2 * EPS
// normal[jj] = 1 + 2 * EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f -2,-2
// normal[ii] = 1 - 2* EPS
// normal[jj] = 1 - 2* EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f2,2
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1 +2* EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
// M_hessian[ii][jj] += 44 * (t0 + t1 - t2 - t3)
//
// //f -1,-1
// normal[ii] = 1 -1 * EPS
// normal[jj] = 1 - 1 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f 1,1
// normal[ii] = 1 + EPS
// normal[jj] = 1 + EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f 1,-1
// normal[ii] = 1 + EPS
// normal[jj] = 1 - EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f-1,1
// normal[ii] = 1 -1 * EPS
// normal[jj] = 1 +1* EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
// M_hessian[ii][jj] += 74 * (t0 + t1 - t2 - t3)
//
// M_hessian[ii][jj] = M_hessian[ii][jj]/600/EPS/EPS*scale[ii]*scale[jj]
// M_hessian[jj][ii] = M_hessian[ii][jj]
endif
endfor
endfor
End
Function energy(vec, oi)
Wave vec
Struct otherinfo &oi
End
Structure otherinfo
Endstructure
Function rosen(w, oi)
Wave w
Struct otherinfo &oi
return (1-w[0])^2 + 105 * (w[1] - w[0]^2)^2
End
Function test(w)
Wave w
Struct otherinfo pop
Hessian(rosen, w, pop)
//should give
//M_hessian[0][0]= {842,-420}
//M_hessian[0][1]= {-420,210}
End
//See the following link for central difference problems
//http://www.holoborodko.com/pavel/numerical-methods/numerical-derivative…
//the following link says why an EPS of 1e-5 should be used. Basically, if the accuracy for the central difference algorithm is A, then the step size, H, should be A^0.3333.
//Thus, if you calculate in DP, the accuracy is on the order of 1e-15. The cube root of 1e-15 is 1e-5.
//http://scicomp.stackexchange.com/questions/1875/adaptive-h-for-gradient…
CONSTANT EPS = 1e-5
Function Hessian(fun, vec, oi)
//this function calculates the second order mixed derivatives of the scalar function fun, around the locus values contained in the vec array
//
//Function fun(vec, oi)
// Wave vec
// struct otherInfo, &oi
// //do some calculation
// return val
// End
//
// vec is an array that specifies the locus around which the derivative matrix is to be estimated
//
// oi is a structure containing other information to pass to the fun function. You can alter this structure to make sense for your function.
//
// One of it main uses is in estimating the Hessian matrix for a least squares problem (the inverse of which gives the covariance matrix).
//
//If you use this to calculate the covariance matrix then you have to follow these steps:
// 1) divide Hessian by 2.
// 2) Take the matrix inverse
// 3) Multiply by chi2/(number of fitted points - number of fitted parameters)
Funcref energy, fun
Wave vec
Struct otherinfo &oi
variable ii, jj, nvec, t0, t1, t2, t3
nvec = numpnts(vec)
make/n=(nvec, nvec)/d/o M_hessian
duplicate/free vec, scale, freevec, normal
redimension/d scale, freevec, normal
scale = 1/vec
normal = 1
for(ii = 0 ; ii < nvec ; ii += 1)
for(jj = ii ; jj < nvec ; jj+=1)
normal = 1
if(ii == jj)
normal[ii] = 1 - 2 * EPS
freevec = normal/scale
t0 = fun(freevec, oi)
normal[ii] = 1 - EPS
freevec = normal /scale
t1 = fun(freevec, oi)
normal[ii] = 1
freevec = normal /scale
t2 = fun(freevec, oi)
normal[ii] = 1 + EPS
freevec = normal /scale
t3 = fun(freevec, oi)
normal[ii] = 1 + 2 * EPS
freevec = normal /scale
t4 = fun(freevec, oi)
M_Hessian[ii][jj] = (-t0 + 16 * t1 - 30 * t2 + 16 * t3 - t4) / 12/EPS/EPS*scale[ii]*scale[ii]
else
//f -1,-1
normal[ii] = 1 * (1 - EPS)
normal[jj] = 1 * (1 - EPS)
freevec = normal / scale
t0 = fun(freevec, oi)
//f +1,+1
normal[ii] = 1 * (1 + EPS)
normal[jj] = 1 * (1 + EPS)
freevec = normal / scale
t1 = fun(freevec, oi)
//f +1,-1
normal[ii] = 1 * (1 + EPS)
normal[jj] = 1 * (1 - EPS)
freevec = normal / scale
t2 = fun(freevec, oi)
//f -1,+1
normal[ii] = 1 * (1 - EPS)
normal[jj] = 1 * (1 + EPS)
freevec = normal / scale
t3 = fun(freevec, oi)
M_hessian[ii][jj] = (t0 + t1 - t2 - t3)/4/EPS/EPS*scale[ii]*scale[jj]
M_hessian[jj][ii] = M_hessian[ii][jj]
// the following is a higher order calculation, which is more expensive, but may be worth while.
// //f 1,-2
// normal[ii] = 1 + EPS
// normal[jj] = 1 - 2 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f +2, -1
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1- EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f -2,+1
// normal[ii] = 1 - 2 * EPS
// normal[jj] = 1 + EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f -1,+1
// normal[ii] = 1 - EPS
// normal[jj] = 1 + 2 * EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
//
// M_hessian[ii][jj] = -63 * (t0 + t1 + t2 + t3)
//
// //f -1,-2
// normal[ii] = 1 - EPS
// normal[jj] = 1 - 2 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f -2, -1
// normal[ii] = 1 - 2 * EPS
// normal[jj] = 1- EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f +1,+2
// normal[ii] = 1 + EPS
// normal[jj] = 1 + 2* EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f+2,+1
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1 + EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
// M_hessian[ii][jj] += 63 * (t0 + t1 + t2 + t3)
//
// //f 2,-2
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1 - 2 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f -2, 2
// normal[ii] = 1 - 2 * EPS
// normal[jj] = 1 + 2 * EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f -2,-2
// normal[ii] = 1 - 2* EPS
// normal[jj] = 1 - 2* EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f2,2
// normal[ii] = 1 + 2 * EPS
// normal[jj] = 1 +2* EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
// M_hessian[ii][jj] += 44 * (t0 + t1 - t2 - t3)
//
// //f -1,-1
// normal[ii] = 1 -1 * EPS
// normal[jj] = 1 - 1 * EPS
// freevec = normal / scale
// t0 = fun(freevec, oi)
//
// //f 1,1
// normal[ii] = 1 + EPS
// normal[jj] = 1 + EPS
// freevec = normal / scale
// t1 = fun(freevec, oi)
//
// //f 1,-1
// normal[ii] = 1 + EPS
// normal[jj] = 1 - EPS
// freevec = normal / scale
// t2 = fun(freevec, oi)
//
// //f-1,1
// normal[ii] = 1 -1 * EPS
// normal[jj] = 1 +1* EPS
// freevec = normal / scale
// t3 = fun(freevec, oi)
// M_hessian[ii][jj] += 74 * (t0 + t1 - t2 - t3)
//
// M_hessian[ii][jj] = M_hessian[ii][jj]/600/EPS/EPS*scale[ii]*scale[jj]
// M_hessian[jj][ii] = M_hessian[ii][jj]
endif
endfor
endfor
End
Function energy(vec, oi)
Wave vec
Struct otherinfo &oi
End
Structure otherinfo
Endstructure
Function rosen(w, oi)
Wave w
Struct otherinfo &oi
return (1-w[0])^2 + 105 * (w[1] - w[0]^2)^2
End
Function test(w)
Wave w
Struct otherinfo pop
Hessian(rosen, w, pop)
//should give
//M_hessian[0][0]= {842,-420}
//M_hessian[0][1]= {-420,210}
End
Forum
Support
Gallery
Igor Pro 9
Learn More
Igor XOP Toolkit
Learn More
Igor NIDAQ Tools MX
Learn More
Rather than hard-commenting out the higher order, perhaps this change would be useful ...
...
variable ho
if (ParamIsDefault(ho))
ho = 0
endif
if (ho)
.... do higher order calculation
endif
end
--
J. J. Weimer
Chemistry / Chemical & Materials Engineering, UAHuntsville
May 6, 2013 at 05:04 pm - Permalink