TooN 2.1
functions/derivatives.h
00001 #ifndef TOON_INCLUDE_DERIVATIVES_NUMERICAL_H
00002 #define TOON_INCLUDE_DERIVATIVES_NUMERICAL_H
00003 
00004 #include <TooN/TooN.h>
00005 #include <vector>
00006 #include <cmath>
00007 
00008 using namespace std;
00009 using namespace TooN;
00010 
00011 namespace TooN{
00012     namespace Internal{
00013         ///@internal
00014         ///@brief Implementation of Ridder's Extrapolation to zero.
00015         ///The function input starts from 1.
00016         ///@param f Functor to extrapolate to zero.
00017         ///@ingroup gInternal
00018         template<class F, class Precision> std::pair<Precision, Precision> extrapolate_to_zero(F& f)
00019         {
00020             using std::isfinite;
00021             using std::max;
00022             using std::isnan;
00023             //Use points at c^0, c^-1, ... to extrapolate towards zero.
00024             const Precision c = 1.1, t = 2;
00025 
00026             static const int Npts = 400;
00027 
00028             /* Neville's table is:  
00029             x_0      y_0    P_0                                 
00030                                   P_{01}                        
00031             x_1      y_1    P_1            P_{012}              
00032                                   P_{12}             P_{0123}   
00033             x_2      y_2    P_2            P_{123}              
00034                                   P_{23}                        
00035             x_3      y_3    P_3   
00036 
00037             In Matrix form, this is rearranged as:
00038             
00039             
00040             x_0      y_0    P_0                                 
00041                                                           
00042             x_1      y_1    P_1   P_{01}                 
00043                                                
00044             x_2      y_2    P_2   P_{12}  P_{012}               
00045                                                                        
00046             x_3      y_3    P_3   P_{23}  P_{123} P_{0123} 
00047 
00048             This is rewritten further as:
00049 
00050                   |                  0      1      2       3
00051              _____|___________________________________________________  
00052               0   | x_0      y_0    P^00                                
00053                   |                                               
00054               1   | x_1      y_1    P^10  P^11                   
00055                   |                                    
00056               2   | x_2      y_2    P^10  P^21    P^22                  
00057                   |                                                            
00058               3   | x_3      y_3    P^10  P^31    P^23    P^33     
00059 
00060             So, P^ij == P_{j-i...j}
00061 
00062             Finally, only the current and previous row are required. This reduces
00063             the memory cost from quadratic to linear.  The naming scheme is:
00064 
00065             P[i][j]    -> P_i[j]
00066             P[i-1][j]  -> P_i_1[j]
00067             */
00068 
00069             vector<Precision> P_i(1), P_i_1;
00070             
00071             Precision best_err = HUGE_VAL;
00072             Precision best_point=HUGE_VAL;
00073 
00074             //The first tranche of points might be bad.
00075             //Don't quit while no good points have ever happened.
00076             bool ever_ok=0;
00077 
00078             //Compute f(x) as x goes from 1 towards zero and extrapolate to 0
00079             Precision x=1;
00080             for(int i=0; i < Npts; i++)
00081             {
00082                 swap(P_i, P_i_1);
00083                 P_i.resize(i+2);
00084 
00085 
00086                 //P[i][0] = c^ -i;
00087                 P_i[0] = f(x);
00088 
00089                 x /= c;
00090                 
00091                 //Compute the extrapolations
00092                 //cj id c^j
00093                 Precision cj = 1;
00094                 bool better=0; //Did we get better?
00095                 bool any_ok=0;
00096                 for(int j=1; j <= i; j++)
00097                 {
00098                     cj *= c;
00099                     //We have (from above) n = i and k = n - j
00100                     //n-k = i - (n - j) = i - i + j = j
00101                     //Therefore c^(n-k) is just c^j
00102 
00103                     P_i[j] = (cj * P_i[j-1] - P_i_1[j-1]) / (cj - 1);
00104 
00105                     if(any_ok || isfinite(P_i[j]))
00106                         ever_ok = 1;
00107 
00108                     //Compute the difference between the current point (high order)
00109                     //and the corresponding lower order point at the current step size
00110                     Precision err1 = abs(P_i[j] - P_i[j-1]);
00111 
00112                     //Compute the difference between two consecutive points at the
00113                     //corresponding lower order point at the larger stepsize
00114                     Precision err2 = abs(P_i[j] - P_i_1[j-1]);
00115 
00116                     //The error is the larger of these.
00117                     Precision err = max(err1, err2);
00118 
00119                     if(err < best_err && isfinite(err))
00120                     {
00121                         best_err = err;
00122                         best_point = P_i[j];
00123                         better=1;
00124                     }
00125                 }
00126                 
00127                 using namespace std;
00128                 //If the highest order point got worse, or went off the rails, 
00129                 //and some good points have been seen, then break.
00130                 if(ever_ok && !better && i > 0 && (abs(P_i[i] - P_i_1[i-1]) > t * best_err|| isnan(P_i[i])))
00131                     break;
00132             }
00133 
00134             return std::make_pair(best_point, best_err);
00135         }
00136 
00137         ///@internal
00138         ///@brief Functor wrapper for computing finite differences along an axis.
00139         ///@ingroup gInternal
00140         template<class Functor, class  Precision, int Size, class Base> struct CentralDifferenceGradient
00141         {
00142             const Vector<Size, Precision, Base>& v; ///< Point about which to compute differences
00143             Vector<Size, Precision> x;      ///< Local copy of v
00144             const Functor&  f;                      ///< Functor to evaluate
00145             int i;                                  ///< Index to difference along
00146 
00147             CentralDifferenceGradient(const Vector<Size, Precision, Base>& v_, const Functor& f_)
00148             :v(v_),x(v),f(f_),i(0)
00149             {}
00150             
00151             ///Compute central difference.
00152             Precision operator()(Precision hh) 
00153             {
00154                 using std::max;
00155                 using std::abs;
00156 
00157                 //Make the step size be on the scale of the value.
00158                 double h = hh * max(abs(v[i]) * 1e-3, 1e-3);
00159 
00160                 x[i] = v[i] - h;
00161                 double f1 = f(x);
00162                 x[i] = v[i] + h;
00163                 double f2 = f(x);
00164                 x[i] = v[i];
00165 
00166                 double d =  (f2 - f1) / (2*h);
00167                 return d;
00168             }
00169         };
00170 
00171         ///@internal
00172         ///@brief Functor wrapper for computing finite difference second derivatives along an axis.
00173         ///@ingroup gInternal
00174         template<class Functor, class  Precision, int Size, class Base> struct CentralDifferenceSecond
00175         {
00176             const Vector<Size, Precision, Base>& v; ///< Point about which to compute differences
00177             Vector<Size, Precision> x;              ///< Local copy of v
00178             const Functor&  f;                      ///< Functor to evaluate
00179             int i;                                  ///< Index to difference along
00180             const Precision central;                ///<Central point.
00181 
00182             CentralDifferenceSecond(const Vector<Size, Precision, Base>& v_, const Functor& f_)
00183             :v(v_),x(v),f(f_),i(0),central(f(x))
00184             {}
00185             
00186             ///Compute central difference.
00187             Precision operator()(Precision hh) 
00188             {
00189                 using std::max;
00190                 using std::abs;
00191 
00192                 //Make the step size be on the scale of the value.
00193                 double h = hh * max(abs(v[i]) * 1e-3, 1e-3);
00194 
00195                 x[i] = v[i] - h;
00196                 double f1 = f(x);
00197 
00198                 x[i] = v[i] + h;
00199                 double f2 = f(x);
00200                 x[i] = v[i];
00201 
00202                 double d =  (f2 - 2*central + f1) / (h*h);
00203                 return d;
00204             }
00205         };
00206 
00207         ///@internal
00208         ///@brief Functor wrapper for computing finite difference cross derivatives along a pair of axes.
00209         ///@ingroup gInternal
00210         template<class Functor, class  Precision, int Size, class Base> struct CentralCrossDifferenceSecond
00211         {
00212             const Vector<Size, Precision, Base>& v; ///< Point about which to compute differences
00213             Vector<Size, Precision> x;              ///< Local copy of v
00214             const Functor&  f;                      ///< Functor to evaluate
00215             int i;                                  ///< Index to difference along
00216             int j;                                  ///< Index to difference along
00217 
00218             CentralCrossDifferenceSecond(const Vector<Size, Precision, Base>& v_, const Functor& f_)
00219             :v(v_),x(v),f(f_),i(0),j(0)
00220             {}
00221             
00222             ///Compute central difference.
00223             Precision operator()(Precision hh) 
00224             {
00225                 using std::max;
00226                 using std::abs;
00227 
00228                 //Make the step size be on the scale of the value.
00229                 double h = hh * max(abs(v[i]) * 1e-3, 1e-3);
00230 
00231                 x[i] = v[i] + h;
00232                 x[j] = v[j] + h;
00233                 double a = f(x);
00234 
00235                 x[i] = v[i] - h;
00236                 x[j] = v[j] + h;
00237                 double b = f(x);
00238 
00239                 x[i] = v[i] + h;
00240                 x[j] = v[j] - h;
00241                 double c = f(x);
00242 
00243 
00244                 x[i] = v[i] - h;
00245                 x[j] = v[j] - h;
00246                 double d = f(x);
00247 
00248                 return (a-b-c+d) / (4*h*h);
00249             }
00250         };
00251 
00252     }
00253 
00254 
00255     /** Extrapolate a derivative to zero using Ridder's Algorithm.
00256 
00257     Ridder's algorithm works by evaluating derivatives with smaller and smaller step
00258     sizes, fitting a polynomial and extrapolating its value to zero.
00259     
00260     This algorithm is generally more accurate and much more reliable, but much slower than
00261     using simple finite differences. It is robust to awkward functions and does not
00262     require careful tuning of the step size, furthermore it provides an estimate of the
00263     errors. This implementation has been tuned for accuracy instead of speed. For an
00264     estimate of the errors, see also numerical_gradient_with_errors(). In general it is
00265     useful to know the errors since some functions are remarkably hard to differentiate
00266     numerically.
00267 
00268     Neville's Algorithm can be used to find a point on a fitted polynomial at
00269     \f$h\f$. Taking
00270     some points \f$h_i\f$ and \f$ y_i = f(h_i)\f$, one can define a table of
00271     points on various polyomials:
00272     \f[
00273     \begin{array}{ccccccc}
00274     h_0    & y_0  & P_0 &        &         &          \\
00275            &      &     & P_{01} &         &          \\
00276     h_1    & y_1  & P_1 &        & P_{012} &          \\
00277            &      &     & P_{12} &         & P_{0123} \\
00278     h_2    & y_2  & P_2 &        & P_{123} &          \\
00279            &      &     & P_{23} &         &          \\
00280     h_3    & y_3  & P_3 &        &         &          \\
00281     \end{array}                            
00282     \f]
00283     where \f$P_{123}\f$ is the value of a polynomial fitted to datapoints 1, 2 and 3
00284     evaluated at \f$ h\f$. The initial values are simple to evaluate: \f$P_i = y_i =
00285     f(h_i)\f$. The remaining values are determined by the recurrance:
00286     \f{equation}
00287         P_{k\cdots n} = \frac{(h - h_n)P_{k \cdots n-1} + (h_k - h)P_{k+1 \cdots n}}{h_k - h_n}
00288     \f}
00289 
00290     For Ridder's algorithm, we define the extrapolation  point \f$ h=0 \f$ and the
00291     sequence of points to evaluate as \f$ h_i = c^{-i} \f$ where \f$ c \f$ is some
00292     constant a little greater than 1. 
00293     Substituting in to the above equation gives:
00294     \f[
00295         P_{k \cdots n} = \frac{c^{-k} P_{k+1 \cdots n} - P_{k \cdots n-1}}{c^{n - k} - 1}
00296     \f]
00297 
00298     To measure errors, when computing (for example) \f$P_{234}\f$, we compare the
00299     value to the lower order with the same step size \f$P_{34}\f$, and the lower
00300     order with a larger step size \f$P_{23}\f$. The error estimate is the largest of
00301     these. The  extrapolation with the smallest error is retained.
00302 
00303     Not only that, but since every value of P is used, every single subset of points
00304     is used for polynomial fitting. So, bad points for large and small \f$h\f$ do
00305     not spoil the answer.
00306 
00307     It is generally assumed that as \f$h\f$ decreases, the errors decrease, then
00308     increase again. Therefore, if the current step size did not yield any
00309     improvements on the best point so far, then we terminate when the highest order
00310     point is \f$ t \f$ times worse then the previous order point.
00311 
00312     The parameters are:
00313     - \f$ c = 1.1 \f$
00314     - \f$ t = 2 \f$
00315     - Max iterations = 400
00316 
00317     \section rRef  References                         
00318 
00319     - Ridders, C. J. F, 1982, Advances in Engineering Software, 4(2) 75--76
00320     - Press, Vetterling, Teukolsky, Flannery, Numerical, 1997, Numerical Recipies in C (2nd ed.), Chapter 5.7
00321 
00322     @param f Functor to differentiate
00323     @param x Point about which to differentiate.
00324     @ingroup gFunctions
00325     */
00326     template<class F, int S, class P, class B> Vector<S, P> numerical_gradient(const F& f, const Vector<S, P, B>& x)
00327     {
00328         using namespace Internal;
00329         Vector<S> grad(x.size());
00330 
00331         CentralDifferenceGradient<F, P, S, B> d(x, f);
00332 
00333         for(int i=0; i < x.size(); i++)
00334         {
00335             d.i = i;
00336             grad[i] = extrapolate_to_zero<CentralDifferenceGradient<F, P, S, B>, P>(d).first;
00337         }
00338 
00339         return grad;
00340     }
00341     
00342     ///Compute numerical gradients with errors.
00343     ///See numerical_gradient().
00344     ///Gradients are returned in the first row of the returned matrix.
00345     ///Errors are returned in the second row. The errors have not been scaled, so they are
00346     ///in the same range as the gradients.
00347     ///@param f Functor to differentiate
00348     ///@param x Point about which to differentiate.
00349     ///@ingroup gFunctions 
00350     template<class F, int S, class P, class B> Matrix<S,2,P> numerical_gradient_with_errors(const F& f, const Vector<S, P, B>& x)
00351     {
00352         using namespace Internal;
00353         Matrix<S,2> g(x.size(), 2);
00354 
00355         CentralDifferenceGradient<F, P, S, B> d(x, f);
00356 
00357         for(int i=0; i < x.size(); i++)
00358         {
00359             d.i = i;
00360             pair<double, double> r= extrapolate_to_zero<CentralDifferenceGradient<F, P, S, B>, P>(d);
00361             g[i][0] = r.first;
00362             g[i][1] = r.second;
00363         }
00364 
00365         return g;
00366     }
00367 
00368     
00369     ///Compute the numerical Hessian using central differences and Ridder's method:
00370     ///\f[
00371     /// \frac{\partial^2 f}{\partial x^2} \approx \frac{f(x-h) - 2f(x) + f(x+h)}{h^2}
00372     ///\f]
00373     ///\f[
00374     /// \frac{\partial^2 f}{\partial x\partial y} \approx \frac{f(x+h, y+h) - f(x-h,y+h) - f(x+h, y-h) + f(x-h, y-h)}{4h^2}
00375     ///\f]
00376     ///See numerical_gradient().
00377     ///@param f Functor to double-differentiate
00378     ///@param x Point about which to double-differentiate.
00379     ///@ingroup gFunctions 
00380     template<class F, int S, class P, class B> pair<Matrix<S, S, P>, Matrix<S, S, P> > numerical_hessian_with_errors(const F& f, const Vector<S, P, B>& x)
00381     {
00382         using namespace Internal;
00383         Matrix<S> hess(x.size(), x.size());
00384         Matrix<S> errors(x.size(), x.size());
00385 
00386         CentralDifferenceSecond<F, P, S, B> curv(x, f);
00387         CentralCrossDifferenceSecond<F, P, S, B> cross(x, f);
00388 
00389         //Perform the cross differencing.
00390 
00391         for(int r=0; r < x.size(); r++)
00392             for(int c=r+1; c < x.size(); c++)
00393             {
00394                 cross.i = r;
00395                 cross.j = c;
00396                 pair<double, double> e =  extrapolate_to_zero<CentralCrossDifferenceSecond<F, P, S, B>, P >(cross);
00397                 hess[r][c] = hess[c][r] = e.first;
00398                 errors[r][c] = errors[c][r] = e.second;
00399             }
00400 
00401         for(int i=0; i < x.size(); i++)
00402         {
00403             curv.i = i;
00404             pair<double, double> e = extrapolate_to_zero<CentralDifferenceSecond<F, P, S, B>, P>(curv);
00405             hess[i][i] = e.first;
00406             errors[i][i] = e.second;
00407         }
00408 
00409         return make_pair(hess, errors);
00410     }
00411     
00412     ///Compute the numerical Hessian and errors. The Hessian is returned as the first
00413     ///element, and the errors as the second.
00414     ///See numerical_hessian().
00415     ///@param f Functor to double-differentiate
00416     ///@param x Point about which to double-differentiate.
00417     ///@ingroup gFunctions 
00418     template<class F, int S, class P, class B> Matrix<S, S, P> numerical_hessian(const F& f, const Vector<S, P, B>& x)
00419     {
00420         using namespace Internal;
00421         Matrix<S> hess(x.size(), x.size());
00422 
00423         CentralDifferenceSecond<F, P, S, B> curv(x, f);
00424         CentralCrossDifferenceSecond<F, P, S, B> cross(x, f);
00425 
00426         //Perform the cross differencing.
00427         for(int r=0; r < x.size(); r++)
00428             for(int c=r+1; c < x.size(); c++)
00429             {
00430                 cross.i = r;
00431                 cross.j = c;
00432                 pair<double, double> e =  extrapolate_to_zero<CentralCrossDifferenceSecond<F, P, S, B>, P >(cross);
00433                 hess[r][c] = hess[c][r] = e.first;
00434             }
00435 
00436         for(int i=0; i < x.size(); i++)
00437         {
00438             curv.i = i;
00439             pair<double, double> e = extrapolate_to_zero<CentralDifferenceSecond<F, P, S, B>, P>(curv);
00440             hess[i][i] = e.first;
00441         }
00442 
00443         return hess;
00444     }
00445 }
00446 
00447 #endif
00448