RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2012 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef _RD_MOL_OPS_H_
12 #define _RD_MOL_OPS_H_
13 
14 #include <vector>
15 #include <map>
16 #include <list>
17 #include <boost/smart_ptr.hpp>
18 #include <boost/dynamic_bitset.hpp>
19 #include <RDGeneral/types.h>
20 
21 extern const int ci_LOCAL_INF;
22 namespace RDKit {
23 class ROMol;
24 class RWMol;
25 class Atom;
26 class Bond;
27 typedef std::vector<double> INVAR_VECT;
28 typedef INVAR_VECT::iterator INVAR_VECT_I;
29 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
30 
31 //! \brief Groups a variety of molecular query and transformation operations.
32 namespace MolOps {
33 
34 //! return the number of electrons available on an atom to donate for
35 // aromaticity
36 /*!
37  The result is determined using the default valency, number of lone pairs,
38  number of bonds and the formal charge. Note that the atom may not donate
39  all of these electrons to a ring for aromaticity (also used in Conjugation
40  and hybridization code).
41 
42  \param at the atom of interest
43 
44  \return the number of electrons
45 */
46 int countAtomElec(const Atom *at);
47 
48 //! sums up all atomic formal charges and returns the result
49 int getFormalCharge(const ROMol &mol);
50 
51 //! returns whether or not the given Atom is involved in a conjugated bond
52 bool atomHasConjugatedBond(const Atom *at);
53 
54 //! find fragments (disconnected components of the molecular graph)
55 /*!
56 
57  \param mol the molecule of interest
58  \param mapping used to return the mapping of Atoms->fragments.
59  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
60  and will contain the fragment assignment for each Atom
61 
62  \return the number of fragments found.
63 
64 */
65 unsigned int getMolFrags(const ROMol &mol, std::vector<int> &mapping);
66 //! find fragments (disconnected components of the molecular graph)
67 /*!
68 
69  \param mol the molecule of interest
70  \param frags used to return the Atoms in each fragment
71  On return \c mapping will be \c numFrags long, and each entry
72  will contain the indices of the Atoms in that fragment.
73 
74  \return the number of fragments found.
75 
76 */
77 unsigned int getMolFrags(const ROMol &mol,
78  std::vector<std::vector<int> > &frags);
79 
80 //! splits a molecule into its component fragments
81 // (disconnected components of the molecular graph)
82 /*!
83 
84  \param mol the molecule of interest
85  \param sanitizeFrags toggles sanitization of the fragments after
86  they are built
87  \param frags used to return the mapping of Atoms->fragments.
88  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
89  on return and will contain the fragment assignment for each Atom
90  \param fragsMolAtomMapping used to return the Atoms in each fragment
91  On return \c mapping will be \c numFrags long, and each entry
92  will contain the indices of the Atoms in that fragment.
93  \param copyConformers toggles copying conformers of the fragments after
94  they are built
95  \return a vector of the fragments as smart pointers to ROMols
96 
97 */
98 std::vector<boost::shared_ptr<ROMol> > getMolFrags(
99  const ROMol &mol, bool sanitizeFrags = true, std::vector<int> *frags = 0,
100  std::vector<std::vector<int> > *fragsMolAtomMapping = 0,
101  bool copyConformers = true);
102 
103 //! splits a molecule into pieces based on labels assigned using a query
104 /*!
105 
106  \param mol the molecule of interest
107  \param query the query used to "label" the molecule for fragmentation
108  \param sanitizeFrags toggles sanitization of the fragments after
109  they are built
110  \param whiteList if provided, only labels in the list will be kept
111  \param negateList if true, the white list logic will be inverted: only labels
112  not in the list will be kept
113 
114  \return a map of the fragments and their labels
115 
116 */
117 template <typename T>
118 std::map<T, boost::shared_ptr<ROMol> > getMolFragsWithQuery(
119  const ROMol &mol, T (*query)(const ROMol &, const Atom *),
120  bool sanitizeFrags = true, const std::vector<T> *whiteList = 0,
121  bool negateList = false);
122 
123 #if 0
124  //! finds a molecule's minimium spanning tree (MST)
125  /*!
126  \param mol the molecule of interest
127  \param mst used to return the MST as a vector of bond indices
128  */
129  void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
130 #endif
131 
132 //! calculates Balaban's J index for the molecule
133 /*!
134  \param mol the molecule of interest
135  \param useBO toggles inclusion of the bond order in the calculation
136  (when false, we're not really calculating the J value)
137  \param force forces the calculation (instead of using cached results)
138  \param bondPath when included, only paths using bonds whose indices occur
139  in this vector will be included in the calculation
140  \param cacheIt If this is true, the calculated value will be cached
141  as a property on the molecule
142  \return the J index
143 
144 */
145 double computeBalabanJ(const ROMol &mol, bool useBO = true, bool force = false,
146  const std::vector<int> *bondPath = 0,
147  bool cacheIt = true);
148 //! \overload
149 double computeBalabanJ(double *distMat, int nb, int nAts);
150 
151 //! \name Dealing with hydrogens
152 //{@
153 
154 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
155 /*!
156  \param mol the molecule to add Hs to
157  \param explicitOnly (optional) if this \c true, only explicit Hs will be
158  added
159  \param addCoords (optional) If this is true, estimates for the atomic
160  coordinates
161  of the added Hs will be used.
162  \param onlyOnAtoms (optional) if provided, this should be a vector of
163  IDs of the atoms that will be considered for H addition.
164 
165  \return the new molecule
166 
167  <b>Notes:</b>
168  - it makes no sense to use the \c addCoords option if the molecule's
169  heavy
170  atoms don't already have coordinates.
171  - the caller is responsible for <tt>delete</tt>ing the pointer this
172  returns.
173  */
174 ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
175  bool addCoords = false, const UINT_VECT *onlyOnAtoms = NULL);
176 //! \overload
177 // modifies the molecule in place
178 void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
179  const UINT_VECT *onlyOnAtoms = NULL);
180 
181 //! returns a copy of a molecule with hydrogens removed
182 /*!
183  \param mol the molecule to remove Hs from
184  \param implicitOnly (optional) if this \c true, only implicit Hs will be
185  removed
186  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
187  are removed
188  from the graph, the heavy atom to which they are bound will have its
189  counter of
190  explicit Hs increased.
191  \param sanitize: (optional) If this is \c true, the final molecule will be
192  sanitized
193 
194  \return the new molecule
195 
196  <b>Notes:</b>
197  - Hydrogens which aren't connected to a heavy atom will not be
198  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
199  all atoms removed.
200  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
201  will not be removed.
202  - two coordinate Hs, like the central H in C[H-]C, will not be removed
203  - Hs connected to dummy atoms will not be removed
204 
205  - the caller is responsible for <tt>delete</tt>ing the pointer this
206  returns.
207 */
208 ROMol *removeHs(const ROMol &mol, bool implicitOnly = false,
209  bool updateExplicitCount = false, bool sanitize = true);
210 //! \overload
211 // modifies the molecule in place
212 void removeHs(RWMol &mol, bool implicitOnly = false,
213  bool updateExplicitCount = false, bool sanitize = true);
214 
215 //! returns a copy of a molecule with hydrogens removed and added as queries
216 //! to the heavy atoms to which they are bound.
217 /*!
218  This is really intended to be used with molecules that contain QueryAtoms
219 
220  \param mol the molecule to remove Hs from
221 
222  \return the new molecule
223 
224  <b>Notes:</b>
225  - Atoms that do not already have hydrogen count queries will have one
226  added, other H-related queries will not be touched. Examples:
227  - C[H] -> [C;!H0]
228  - [C;H1][H] -> [C;H1]
229  - [C;H2][H] -> [C;H2]
230  - Hydrogens which aren't connected to a heavy atom will not be
231  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
232  all atoms removed.
233  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
234  - By default all hydrogens are removed, however if
235  mergeUnmappedOnly is true, any hydrogen participating
236  in an atom map will be retained
237 
238 */
239 ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly = false);
240 //! \overload
241 // modifies the molecule in place
242 void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly = false);
243 
244 typedef enum {
248  ADJUST_SETALL = 0xFFFFFFF
251  bool adjustDegree; /**< add degree queries */
253  bool adjustRingCount; /**< add ring-count queries */
255 
256  bool makeDummiesQueries; /**< convert dummy atoms without isotope labels to
257  any-atom queries */
258 
260  : adjustDegree(true),
261  adjustDegreeFlags(ADJUST_SETALL),
262  adjustRingCount(false),
263  adjustRingCountFlags(ADJUST_SETALL),
264  makeDummiesQueries(true)
265 
266  {}
267 };
268 //! returns a copy of a molecule with query properties adjusted
269 /*!
270  \param mol the molecule to adjust
271  \param params controls the adjustments made
272 
273  \return the new molecule
274 */
275 ROMol *adjustQueryProperties(const ROMol &mol,
276  const AdjustQueryParameters *params = NULL);
277 //! \overload
278 // modifies the molecule in place
279 void adjustQueryProperties(RWMol &mol,
280  const AdjustQueryParameters *params = NULL);
281 
282 //! returns a copy of a molecule with the atoms renumbered
283 /*!
284 
285  \param mol the molecule to work with
286  \param newOrder the new ordering of the atoms (should be numAtoms long)
287  for example: if newOrder is [3,2,0,1], then atom 3 in the original
288  molecule will be atom 0 in the new one
289 
290  \return the new molecule
291 
292  <b>Notes:</b>
293  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
294 
295 */
296 ROMol *renumberAtoms(const ROMol &mol,
297  const std::vector<unsigned int> &newOrder);
298 
299 //@}
300 
301 //! \name Sanitization
302 //@{
303 
304 typedef enum {
316  SANITIZE_ALL = 0xFFFFFFF
317 } SanitizeFlags;
318 
319 //! \brief carries out a collection of tasks for cleaning up a molecule and
320 // ensuring
321 //! that it makes "chemical sense"
322 /*!
323  This functions calls the following in sequence
324  -# MolOps::cleanUp()
325  -# mol.updatePropertyCache()
326  -# MolOps::symmetrizeSSSR()
327  -# MolOps::Kekulize()
328  -# MolOps::assignRadicals()
329  -# MolOps::setAromaticity()
330  -# MolOps::setConjugation()
331  -# MolOps::setHybridization()
332  -# MolOps::cleanupChirality()
333  -# MolOps::adjustHs()
334 
335  \param mol : the RWMol to be cleaned
336 
337  \param operationThatFailed : the first (if any) sanitization operation that
338  fails is set here.
339  The values are taken from the \c SanitizeFlags
340  enum.
341  On success, the value is \c
342  SanitizeFlags::SANITIZE_NONE
343 
344  \param sanitizeOps : the bits here are used to set which sanitization
345  operations are carried
346  out. The elements of the \c SanitizeFlags enum define
347  the operations.
348 
349  <b>Notes:</b>
350  - If there is a failure in the sanitization, a \c SanitException
351  will be thrown.
352  - in general the user of this function should cast the molecule following
353  this
354  function to a ROMol, so that new atoms and bonds cannot be added to the
355  molecule and screw up the sanitizing that has been done here
356 */
357 void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed,
358  unsigned int sanitizeOps = SANITIZE_ALL);
359 //! \overload
360 void sanitizeMol(RWMol &mol);
361 
362 //! Sets up the aromaticity for a molecule
363 /*!
364 
365  This is what happens here:
366  -# find all the simple rings by calling the findSSSR function
367  -# loop over all the Atoms in each ring and mark them if they are
368  candidates
369  for aromaticity. A ring atom is a candidate if it can spare electrons
370  to the ring and if it's from the first two rows of the periodic table.
371  -# ased on the candidate atoms, mark the rings to be either candidates
372  or non-candidates. A ring is a candidate only if all its atoms are
373  candidates
374  -# apply Hueckel rule to each of the candidate rings to check if the ring
375  can be
376  aromatic
377 
378  \param mol the RWMol of interest
379 
380  \return 1 on succes, 0 otherwise
381 
382  <b>Assumptions:</b>
383  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
384  been called)
385 
386 */
387 int setAromaticity(RWMol &mol);
388 
389 //! Designed to be called by the sanitizer to handle special cases before
390 // anything is done.
391 /*!
392 
393  Currently this:
394  - modifies nitro groups, so that the nitrogen does not have an unreasonable
395  valence of 5, as follows:
396  - the nitrogen gets a positive charge
397  - one of the oxygens gets a negative chage and the double bond to this
398  oxygen is changed to a single bond
399  The net result is that nitro groups can be counted on to be:
400  \c "[N+](=O)[O-]"
401  - modifies halogen-oxygen containing species as follows:
402  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
403  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
404  \c [Cl,Br,I](=O)O -> [X+]([O-])O
405  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
406 
407  \param mol the molecule of interest
408 
409 */
410 void cleanUp(RWMol &mol);
411 
412 //! Called by the sanitizer to assign radical counts to atoms
413 void assignRadicals(RWMol &mol);
414 
415 //! adjust the number of implicit and explicit Hs for special cases
416 /*!
417 
418  Currently this:
419  - modifies aromatic nitrogens so that, when appropriate, they have an
420  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
421 
422  \param mol the molecule of interest
423 
424  <b>Assumptions</b>
425  - this is called after the molecule has been sanitized,
426  aromaticity has been perceived, and the implicit valence of
427  everything has been calculated.
428 
429 */
430 void adjustHs(RWMol &mol);
431 
432 //! Kekulizes the molecule
433 /*!
434 
435  \param mol the molecule of interest
436  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
437  on both the Bonds and Atoms are turned to false
438  following
439  the Kekulization, otherwise they are left alone in
440  their
441  original state.
442  \param maxBackTracks the maximum number of attempts at back-tracking. The
443  algorithm
444  uses a back-tracking procedure to revist a previous
445  setting of
446  double bond if we hit a wall in the kekulization
447  process
448 
449  <b>Notes:</b>
450  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
451  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
452  RDKit::Bond::SINGLE
453  or RDKit::Bond::DOUBLE during Kekulization.
454 
455 */
456 void Kekulize(RWMol &mol, bool markAtomsBonds = true,
457  unsigned int maxBackTracks = 100);
458 
459 //! flags the molecule's conjugated bonds
460 void setConjugation(ROMol &mol);
461 
462 //! calculates and sets the hybridization of all a molecule's Stoms
463 void setHybridization(ROMol &mol);
464 
465 // @}
466 
467 //! \name Ring finding and SSSR
468 //@{
469 
470 //! finds a molecule's Smallest Set of Smallest Rings
471 /*!
472  Currently this implements a modified form of Figueras algorithm
473  (JCICS - Vol. 36, No. 5, 1996, 986-991)
474 
475  \param mol the molecule of interest
476  \param res used to return the vector of rings. Each entry is a vector with
477  atom indices. This information is also stored in the molecule's
478  RingInfo structure, so this argument is optional (see overload)
479 
480  \return number of smallest rings found
481 
482  Base algorithm:
483  - The original algorithm starts by finding representative degree 2
484  nodes.
485  - Representative because if a series of deg 2 nodes are found only
486  one of them is picked.
487  - The smallest ring around each of them is found.
488  - The bonds that connect to this degree 2 node are them chopped off,
489  yielding
490  new deg two nodes
491  - The process is repeated on the new deg 2 nodes.
492  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
493  with it is found. A bond from this is "carefully" (look in the paper)
494  selected and chopped, yielding deg 2 nodes. The process is same as
495  above once this is done.
496 
497  Our Modifications:
498  - If available, more than one smallest ring around a representative deg 2
499  node will be computed and stored
500  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
501  available)
502  and all the bond to that node are chopped.
503  - The extra rings that were found in this process are removed after all the
504  nodes
505  have been covered.
506 
507  These changes were motivated by several factors:
508  - We believe the original algorithm fails to find the correct SSSR
509  (finds the correct number of them but the wrong ones) on some sample mols
510  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
511  The extra rings this process adds can be quite useful.
512 */
513 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > &res);
514 //! \overload
515 int findSSSR(const ROMol &mol, std::vector<std::vector<int> > *res = 0);
516 
517 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
518 /*!
519  \b NOTE: though the RingInfo structure is populated by this function,
520  the only really reliable calls that can be made are to check if
521  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
522  return values >0
523 */
524 void fastFindRings(const ROMol &mol);
525 
526 //! symmetrize the molecule's Smallest Set of Smallest Rings
527 /*!
528  SSSR rings obatined from "findSSSR" can be non-unique in some case.
529  For example, cubane has five SSSR rings, not six as one would hope.
530 
531  This function adds additional rings to the SSSR list if necessary
532  to make the list symmetric, e.g. all atoms in cubane will be part of the same
533  number
534  of SSSRs. This function choses these extra rings from the extra rings
535  computed
536  and discarded during findSSSR. The new ring are chosen such that:
537  - replacing a same sized ring in the SSSR list with an extra ring yields
538  the same union of bond IDs as the orignal SSSR list
539 
540  \param mol - the molecule of interest
541  \param res used to return the vector of rings. Each entry is a vector with
542  atom indices. This information is also stored in the molecule's
543  RingInfo structure, so this argument is optional (see overload)
544 
545  \return the total number of rings = (new rings + old SSSRs)
546 
547  <b>Notes:</b>
548  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
549  first
550 */
551 int symmetrizeSSSR(ROMol &mol, std::vector<std::vector<int> > &res);
552 //! \overload
553 int symmetrizeSSSR(ROMol &mol);
554 
555 //@}
556 
557 //! \name Shortest paths and other matrices
558 //@{
559 
560 //! returns a molecule's adjacency matrix
561 /*!
562  \param mol the molecule of interest
563  \param useBO toggles use of bond orders in the matrix
564  \param emptyVal sets the empty value (for non-adjacent atoms)
565  \param force forces calculation of the matrix, even if already
566  computed
567  \param propNamePrefix used to set the cached property name
568 
569  \return the adjacency matrix.
570 
571  <b>Notes</b>
572  - The result of this is cached in the molecule's local property dictionary,
573  which will handle deallocation. The caller should <b>not</b> \c delete
574  this pointer.
575 
576 */
577 double *getAdjacencyMatrix(const ROMol &mol, bool useBO = false,
578  int emptyVal = 0, bool force = false,
579  const char *propNamePrefix = 0,
580  const boost::dynamic_bitset<> *bondsToUse = 0);
581 
582 //! Computes the molecule's topological distance matrix
583 /*!
584  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
585 
586  \param mol the molecule of interest
587  \param useBO toggles use of bond orders in the matrix
588  \param useAtomWts sets the diagonal elements of the result to
589  6.0/(atomic number) so that the matrix can be used to calculate
590  Balaban J values. This does not affect the bond weights.
591  \param force forces calculation of the matrix, even if already
592  computed
593  \param propNamePrefix used to set the cached property name
594 
595  \return the distance matrix.
596 
597  <b>Notes</b>
598  - The result of this is cached in the molecule's local property dictionary,
599  which will handle deallocation. The caller should <b>not</b> \c delete
600  this pointer.
601 
602 
603 */
604 double *getDistanceMat(const ROMol &mol, bool useBO = false,
605  bool useAtomWts = false, bool force = false,
606  const char *propNamePrefix = 0);
607 
608 //! Computes the molecule's topological distance matrix
609 /*!
610  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
611 
612  \param mol the molecule of interest
613  \param activeAtoms only elements corresponding to these atom indices
614  will be included in the calculation
615  \param bonds only bonds found in this list will be included in the
616  calculation
617  \param useBO toggles use of bond orders in the matrix
618  \param useAtomWts sets the diagonal elements of the result to
619  6.0/(atomic number) so that the matrix can be used to calculate
620  Balaban J values. This does not affect the bond weights.
621 
622  \return the distance matrix.
623 
624  <b>Notes</b>
625  - The results of this call are not cached, the caller <b>should</b> \c
626  delete
627  this pointer.
628 
629 
630 */
631 double *getDistanceMat(const ROMol &mol, const std::vector<int> &activeAtoms,
632  const std::vector<const Bond *> &bonds,
633  bool useBO = false, bool useAtomWts = false);
634 
635 //! Computes the molecule's 3D distance matrix
636 /*!
637 
638  \param mol the molecule of interest
639  \param confId the conformer to use
640  \param useAtomWts sets the diagonal elements of the result to
641  6.0/(atomic number)
642  \param force forces calculation of the matrix, even if already
643  computed
644  \param propNamePrefix used to set the cached property name
645  (if set to an empty string, the matrix will not be
646  cached)
647 
648  \return the distance matrix.
649 
650  <b>Notes</b>
651  - The result of this is cached in the molecule's local property dictionary,
652  which will handle deallocation. Do the caller should <b>not</b> \c delete
653  this pointer.
654 
655 */
656 double *get3DDistanceMat(const ROMol &mol, int confId = -1,
657  bool useAtomWts = false, bool force = false,
658  const char *propNamePrefix = 0);
659 //! Find the shortest path between two atoms
660 /*!
661  Uses the Bellman-Ford algorithm
662 
663  \param mol molecule of interest
664  \param aid1 index of the first atom
665  \param aid2 index of the second atom
666 
667  \return an std::list with the indices of the atoms along the shortest
668  path
669 
670  <b>Notes:</b>
671  - the starting and end atoms are included in the path
672  - if no path is found, an empty path is returned
673 
674 */
675 std::list<int> getShortestPath(const ROMol &mol, int aid1, int aid2);
676 
677 //@}
678 
679 #if 0
680  //! \name Canonicalization
681  //@{
682 
683  //! assign a canonical ordering to a molecule's atoms
684  /*!
685  The algorithm used here is a modification of the published Daylight canonical
686  smiles algorithm (i.e. it uses atom invariants and products of primes).
687 
688  \param mol the molecule of interest
689  \param ranks used to return the ranks
690  \param breakTies toggles breaking of ties (see below)
691  \param includeChirality toggles inclusion of chirality in the invariants
692  \param includeIsotopes toggles inclusion of isotopes in the invariants
693  \param rankHistory used to return the rank history (see below)
694 
695  <b>Notes:</b>
696  - Tie breaking should be done when it's important to have a full ordering
697  of the atoms (e.g. when generating canonical traversal trees). If it's
698  acceptable to have ties between symmetry-equivalent atoms (e.g. when
699  generating CIP codes), tie breaking can/should be skipped.
700  - if the \c rankHistory argument is provided, the evolution of the ranks of
701  individual atoms will be tracked. The \c rankHistory pointer should be
702  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
703  */
704  void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
705  bool breakTies=true,
706  bool includeChirality=true,
707  bool includeIsotopes=true,
708  std::vector<std::vector<int> > *rankHistory=0);
709  //! assign a canonical ordering to a sub-molecule's atoms
710  /*!
711  The algorithm used here is a modification of the published Daylight canonical
712  smiles algorithm (i.e. it uses atom invariants and products of primes).
713 
714  \param mol the molecule of interest
715  \param atomsToUse atoms to be included
716  \param bondsToUse bonds to be included
717  \param atomSymbols symbols to use for the atoms in the output (these are
718  used in place of atomic number and isotope information)
719  \param ranks used to return the ranks
720  \param breakTies toggles breaking of ties (see below)
721  \param rankHistory used to return the rank history (see below)
722 
723  <b>Notes:</b>
724  - Tie breaking should be done when it's important to have a full ordering
725  of the atoms (e.g. when generating canonical traversal trees). If it's
726  acceptable to have ties between symmetry-equivalent atoms (e.g. when
727  generating CIP codes), tie breaking can/should be skipped.
728  - if the \c rankHistory argument is provided, the evolution of the ranks of
729  individual atoms will be tracked. The \c rankHistory pointer should be
730  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
731  */
732  void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
733  const boost::dynamic_bitset<> &atomsToUse,
734  const boost::dynamic_bitset<> &bondsToUse,
735  const std::vector<std::string> *atomSymbols=0,
736  const std::vector<std::string> *bondSymbols=0,
737  bool breakTies=true,
738  std::vector<std::vector<int> > *rankHistory=0);
739 
740  // @}
741 #endif
742 //! \name Stereochemistry
743 //@{
744 
745 //! removes bogus chirality markers (those on non-sp3 centers):
746 void cleanupChirality(RWMol &mol);
747 
748 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
749 /*!
750  \param mol the molecule of interest
751  \param confId the conformer to use
752  \param replaceExistingTags if this flag is true, any existing atomic chiral
753  tags will be replaced
754 
755  If the conformer provided is not a 3D conformer, nothing will be done.
756 */
757 void assignChiralTypesFrom3D(ROMol &mol, int confId = -1,
758  bool replaceExistingTags = true);
759 
760 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
761 /*!
762 
763  \param mol the molecule of interest
764  \param cleanIt toggles removal of stereo flags from double bonds that can
765  not have stereochemistry
766  \param force forces the calculation to be repeated even if it has
767  already been done
768  \param flagPossibleStereoCenters set the _ChiralityPossible property on
769  atoms that are possible stereocenters
770 
771  <b>Notes:M</b>
772  - Throughout we assume that we're working with a hydrogen-suppressed
773  graph.
774 
775 */
776 void assignStereochemistry(ROMol &mol, bool cleanIt = false, bool force = false,
777  bool flagPossibleStereoCenters = false);
778 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
779 //(i.e. Z/E)
780 /*!
781 
782  \param mol the molecule of interest
783 */
784 void removeStereochemistry(ROMol &mol);
785 
786 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
787 //! Bond::STEREONONE
788 /*!
789  \param mol the molecule of interest
790  \param cleanIt toggles removal of stereo flags from double bonds that can
791  not have stereochemistry
792 
793  This function is usefuly in two situations
794  - when parsing a mol file; for the bonds marked here, coordinate
795  informations
796  on the neighbors can be used to indentify cis or trans states
797  - when writing a mol file; bonds that can be cis/trans but not marked as
798  either
799  need to be specially marked in the mol file
800 */
801 void findPotentialStereoBonds(ROMol &mol, bool cleanIt = false);
802 //@}
803 
804 //! returns the number of atoms which have a particular property set
805 unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop);
806 
807 }; // end of namespace MolOps
808 }; // end of namespace RDKit
809 
810 #endif
ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=0, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=0, bool cacheIt=true)
calculates Balaban&#39;s J index for the molecule
void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule&#39;s atoms.
int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res)
finds a molecule&#39;s Smallest Set of Smallest Rings
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=NULL)
returns a copy of a molecule with hydrogens added in as explicit Atoms
const int ci_LOCAL_INF
int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res)
symmetrize the molecule&#39;s Smallest Set of Smallest Rings
int setAromaticity(RWMol &mol)
Sets up the aromaticity for a molecule.
void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and
ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=NULL)
returns a copy of a molecule with query properties adjusted
int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for
AdjustQueryWhichFlags adjustDegreeFlags
Definition: MolOps.h:252
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:28
void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:102
bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AdjustQueryWhichFlags adjustRingCountFlags
Definition: MolOps.h:254
unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s topological distance matrix.
void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
void setConjugation(ROMol &mol)
flags the molecule&#39;s conjugated bonds
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:29
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds.
class for representing a bond
Definition: Bond.h:46
void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before.
void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule&#39;s Stoms
double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=0, const boost::dynamic_bitset<> *bondsToUse=0)
returns a molecule&#39;s adjacency matrix
std::vector< UINT > UINT_VECT
Definition: types.h:165
std::vector< double > INVAR_VECT
Definition: MolOps.h:26
double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s 3D distance matrix.
void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
AdjustQueryWhichFlags
Definition: MolOps.h:244
void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREONONE ...
The class for representing atoms.
Definition: Atom.h:67