RDKit
Open-source cheminformatics and machine learning.
BFGSOpt.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <math.h>
11 #include <RDGeneral/Invariant.h>
12 #include <cstring>
13 #include <algorithm>
14 
15 namespace BFGSOpt {
16 const double FUNCTOL =
17  1e-4; //!< Default tolerance for function convergence in the minimizer
18 const double MOVETOL =
19  1e-7; //!< Default tolerance for x changes in the minimizer
20 const int MAXITS = 200; //!< Default maximum number of iterations
21 const double EPS = 3e-8; //!< Default gradient tolerance in the minimizer
22 const double TOLX =
23  4. * EPS; //!< Default direction vector tolerance in the minimizer
24 const double MAXSTEP = 100.0; //!< Default maximim step size in the minimizer
25 
26 //! Do a Quasi-Newton minimization along a line.
27 /*!
28  See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
29 
30  \param dim the dimensionality of the space.
31  \param oldPt the current position, as an array.
32  \param oldVal the current function value.
33  \param grad the value of the function gradient at oldPt
34  \param dir the minimization direction
35  \param newPt used to return the final position
36  \param newVal used to return the final function value
37  \param func the function to minimize
38  \param maxStep the maximum allowable step size
39  \param resCode used to return the results of the search.
40 
41  Possible values for resCode are on return are:
42  - 0: success
43  - 1: the stepsize got too small. This probably indicates success.
44  - -1: the direction is bad (orthogonal to the gradient)
45 */
46 template <typename EnergyFunctor>
47 void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad,
48  double *dir, double *newPt, double &newVal,
49  EnergyFunctor func, double maxStep, int &resCode) {
50  PRECONDITION(oldPt, "bad input array");
51  PRECONDITION(grad, "bad input array");
52  PRECONDITION(dir, "bad input array");
53  PRECONDITION(newPt, "bad input array");
54 
55  const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
56  double sum = 0.0, slope = 0.0, test = 0.0, lambda = 0.0;
57  double lambda2 = 0.0, lambdaMin = 0.0, tmpLambda = 0.0, val2 = 0.0;
58 
59  resCode = -1;
60 
61  // get the length of the direction vector:
62  sum = 0.0;
63  for (unsigned int i = 0; i < dim; i++) sum += dir[i] * dir[i];
64  sum = sqrt(sum);
65 
66  // rescale if we're trying to move too far:
67  if (sum > maxStep) {
68  for (unsigned int i = 0; i < dim; i++) dir[i] *= maxStep / sum;
69  }
70 
71  // make sure our direction has at least some component along
72  // -grad
73  slope = 0.0;
74  for (unsigned int i = 0; i < dim; i++) {
75  slope += dir[i] * grad[i];
76  }
77  if (slope >= 0.0) {
78  return;
79  }
80 
81  test = 0.0;
82  for (unsigned int i = 0; i < dim; i++) {
83  double temp = fabs(dir[i]) / std::max(fabs(oldPt[i]), 1.0);
84  if (temp > test) test = temp;
85  }
86 
87  lambdaMin = MOVETOL / test;
88  lambda = 1.0;
89  unsigned int it = 0;
90  while (it < MAX_ITER_LINEAR_SEARCH) {
91  // std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
92  if (lambda < lambdaMin) {
93  // the position change is too small
94  resCode = 1;
95  break;
96  }
97  for (unsigned int i = 0; i < dim; i++) {
98  newPt[i] = oldPt[i] + lambda * dir[i];
99  }
100  newVal = func(newPt);
101 
102  if (newVal - oldVal <= FUNCTOL * lambda * slope) {
103  // we're converged on the function:
104  resCode = 0;
105  return;
106  }
107  // if we made it this far, we need to backtrack:
108  if (it == 0) {
109  // it's the first step:
110  tmpLambda = -slope / (2.0 * (newVal - oldVal - slope));
111  } else {
112  double rhs1 = newVal - oldVal - lambda * slope;
113  double rhs2 = val2 - oldVal - lambda2 * slope;
114  double a = (rhs1 / (lambda * lambda) - rhs2 / (lambda2 * lambda2)) /
115  (lambda - lambda2);
116  double b = (-lambda2 * rhs1 / (lambda * lambda) +
117  lambda * rhs2 / (lambda2 * lambda2)) /
118  (lambda - lambda2);
119  if (a == 0.0) {
120  tmpLambda = -slope / (2.0 * b);
121  } else {
122  double disc = b * b - 3 * a * slope;
123  if (disc < 0.0) {
124  tmpLambda = 0.5 * lambda;
125  } else if (b <= 0.0) {
126  tmpLambda = (-b + sqrt(disc)) / (3.0 * a);
127  } else {
128  tmpLambda = -slope / (b + sqrt(disc));
129  }
130  }
131  if (tmpLambda > 0.5 * lambda) {
132  tmpLambda = 0.5 * lambda;
133  }
134  }
135  lambda2 = lambda;
136  val2 = newVal;
137  lambda = std::max(tmpLambda, 0.1 * lambda);
138  ++it;
139  }
140  // nothing was done
141  // std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
142  for (unsigned int i = 0; i < dim; i++) {
143  newPt[i] = oldPt[i];
144  }
145 }
146 
147 #define CLEANUP() \
148  { \
149  delete[] grad; \
150  delete[] dGrad; \
151  delete[] hessDGrad; \
152  delete[] newPos; \
153  delete[] xi; \
154  delete[] invHessian; \
155  }
156 //! Do a BFGS minimization of a function.
157 /*!
158  See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
159 
160  \param dim the dimensionality of the space.
161  \param pos the starting position, as an array.
162  \param gradTol tolerance for gradient convergence
163  \param numIters used to return the number of iterations required
164  \param funcVal used to return the final function value
165  \param func the function to minimize
166  \param gradFunc calculates the gradient of func
167  \param funcTol tolerance for changes in the function value for convergence.
168  \param maxIts maximum number of iterations allowed
169 
170  \return a flag indicating success (or type of failure). Possible values are:
171  - 0: success
172  - 1: too many iterations were required
173 */
174 template <typename EnergyFunctor, typename GradientFunctor>
175 int minimize(unsigned int dim, double *pos, double gradTol,
176  unsigned int &numIters, double &funcVal, EnergyFunctor func,
177  GradientFunctor gradFunc, double funcTol = TOLX,
178  unsigned int maxIts = MAXITS) {
179  RDUNUSED_PARAM(funcTol);
180  PRECONDITION(pos, "bad input array");
181  PRECONDITION(gradTol > 0, "bad tolerance");
182 
183  double sum, maxStep, fp;
184 
185  double *grad, *dGrad, *hessDGrad;
186  double *newPos, *xi;
187  double *invHessian;
188 
189  grad = new double[dim];
190  dGrad = new double[dim];
191  hessDGrad = new double[dim];
192  newPos = new double[dim];
193  xi = new double[dim];
194  invHessian = new double[dim * dim];
195 
196  // evaluate the function and gradient in our current position:
197  fp = func(pos);
198  gradFunc(pos, grad);
199 
200  sum = 0.0;
201  memset(invHessian, 0, dim * dim * sizeof(double));
202  for (unsigned int i = 0; i < dim; i++) {
203  unsigned int itab = i * dim;
204  // initialize the inverse hessian to be identity:
205  invHessian[itab + i] = 1.0;
206  // the first line dir is -grad:
207  xi[i] = -grad[i];
208  sum += pos[i] * pos[i];
209  }
210  // pick a max step size:
211  maxStep = MAXSTEP * std::max(sqrt(sum), static_cast<double>(dim));
212 
213  for (unsigned int iter = 1; iter <= maxIts; iter++) {
214  numIters = iter;
215  int status;
216 
217  // do the line search:
218  linearSearch(dim, pos, fp, grad, xi, newPos, funcVal, func, maxStep,
219  status);
220  CHECK_INVARIANT(status >= 0, "bad direction in linearSearch");
221 
222  // save the function value for the next search:
223  fp = funcVal;
224 
225  // set the direction of this line and save the gradient:
226  double test = 0.0;
227  for (unsigned int i = 0; i < dim; i++) {
228  xi[i] = newPos[i] - pos[i];
229  pos[i] = newPos[i];
230  double temp = fabs(xi[i]) / std::max(fabs(pos[i]), 1.0);
231  if (temp > test) test = temp;
232  dGrad[i] = grad[i];
233  }
234  // std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<"
235  // "<<TOLX<<std::endl;
236  if (test < TOLX) {
237  CLEANUP();
238  return 0;
239  }
240 
241  // update the gradient:
242  double gradScale = gradFunc(pos, grad);
243 
244  // is the gradient converged?
245  test = 0.0;
246  double term = std::max(funcVal * gradScale, 1.0);
247  for (unsigned int i = 0; i < dim; i++) {
248  double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
249  test = std::max(test, temp);
250  dGrad[i] = grad[i] - dGrad[i];
251  }
252  test /= term;
253  // std::cerr<<" "<<gradScale<<" "<<test<<"
254  // "<<gradTol<<std::endl;
255  if (test < gradTol) {
256  CLEANUP();
257  return 0;
258  }
259 
260  // for(unsigned int i=0;i<dim;i++){
261  // figure out how much the gradient changed:
262  //}
263 
264  // compute hessian*dGrad:
265  double fac = 0, fae = 0, sumDGrad = 0, sumXi = 0;
266  for (unsigned int i = 0; i < dim; i++) {
267 #if 0
268  unsigned int itab = i * dim;
269  hessDGrad[i] = 0.0;
270  for (unsigned int j = 0; j < dim; j++) {
271  hessDGrad[i] += invHessian[itab + j] * dGrad[j];
272  }
273 
274 #else
275  double *ivh = &(invHessian[i * dim]);
276  double &hdgradi = hessDGrad[i];
277  double *dgj = dGrad;
278  hdgradi = 0.0;
279  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++dgj) {
280  hdgradi += *ivh * *dgj;
281  }
282 #endif
283  fac += dGrad[i] * xi[i];
284  fae += dGrad[i] * hessDGrad[i];
285  sumDGrad += dGrad[i] * dGrad[i];
286  sumXi += xi[i] * xi[i];
287  }
288  if (fac > sqrt(EPS * sumDGrad * sumXi)) {
289  fac = 1.0 / fac;
290  double fad = 1.0 / fae;
291  for (unsigned int i = 0; i < dim; i++) {
292  dGrad[i] = fac * xi[i] - fad * hessDGrad[i];
293  }
294 
295  for (unsigned int i = 0; i < dim; i++) {
296  unsigned int itab = i * dim;
297 #if 0
298  for (unsigned int j = i; j < dim; j++) {
299  invHessian[itab + j] += fac * xi[i] * xi[j] -
300  fad * hessDGrad[i] * hessDGrad[j] +
301  fae * dGrad[i] * dGrad[j];
302  invHessian[j * dim + i] = invHessian[itab + j];
303  }
304 #else
305  double pxi = fac * xi[i], hdgi = fad * hessDGrad[i],
306  dgi = fae * dGrad[i];
307  double *pxj = &(xi[i]), *hdgj = &(hessDGrad[i]), *dgj = &(dGrad[i]);
308  for (unsigned int j = i; j < dim; ++j, ++pxj, ++hdgj, ++dgj) {
309  invHessian[itab + j] += pxi * *pxj - hdgi * *hdgj + dgi * *dgj;
310  invHessian[j * dim + i] = invHessian[itab + j];
311  }
312 #endif
313  }
314  }
315  // generate the next direction to move:
316  for (unsigned int i = 0; i < dim; i++) {
317  unsigned int itab = i * dim;
318  xi[i] = 0.0;
319 #if 0
320  for (unsigned int j = 0; j < dim; j++) {
321  xi[i] -= invHessian[itab + j] * grad[j];
322  }
323 #else
324  double &pxi = xi[i], *ivh = &(invHessian[itab]), *gj = grad;
325  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++gj) {
326  pxi -= *ivh * *gj;
327  }
328 #endif
329  }
330  }
331  CLEANUP();
332  return 1;
333 }
334 }
const double MAXSTEP
Default maximim step size in the minimizer.
Definition: BFGSOpt.h:24
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:95
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition: BFGSOpt.h:175
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition: BFGSOpt.h:18
#define CLEANUP()
Definition: BFGSOpt.h:147
const double lambda
scaling factor for rBO correction
Definition: UFF/Params.h:87
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition: BFGSOpt.h:16
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition: BFGSOpt.h:47
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:190
const double TOLX
Default direction vector tolerance in the minimizer.
Definition: BFGSOpt.h:22
#define PRECONDITION(expr, mess)
Definition: Invariant.h:103
const double EPS
Default gradient tolerance in the minimizer.
Definition: BFGSOpt.h:21
const int MAXITS
Default maximum number of iterations.
Definition: BFGSOpt.h:20