libfreecontact  1.0.21
freecontact.cpp
Go to the documentation of this file.
1 /* FreeContact - program to predict protein residue contacts from a sufficiently large multiple alignment
2 * Copyright (C) 2012-2013 Laszlo Kajan <lkajan@rostlab.org> Rost Lab, Technical University of Munich, Germany
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17 #include "config.h"
18 
19 #ifdef HAVE_OPENMP
20 #include <omp.h>
21 #endif
22 #include <boost/format.hpp>
23 #include <cblas.h>
24 #include <iomanip>
25 #include <iostream>
26 //#include <memory> // auto_ptr, unique_ptr
27 #include <signal.h>
28 #include <stdio.h>
29 #include <sys/time.h>
30 #include <time.h>
31 #include <unistd.h>
32 #include "freecontact.h"
33 
34 // lkajan: EVfold-mfDCA is calculate_evolutionary_constraints.m
35 
36 namespace bo = boost;
37 
38 typedef float g_fp_t;
39 // n S L thr maxIt msg warm X W Wd WXj info brk
40 extern "C" void glassofast_(int *, g_fp_t *, g_fp_t *, g_fp_t *, int *, int *, int *, g_fp_t *, g_fp_t *, g_fp_t *, g_fp_t *, int *, int *);
41 
42 extern "C" {
43  // LAPACK Cholesky
44  void spotrf_(const char* UPLO, const int* N, float* A, const int* LDA, int* INFO);
45 
46  // LU decomoposition of a general matrix
47  void sgetrf_(int* M, int* N, float* A, int* LDA, int* IPIV, int* INFO);
48 
49  // generate inverse of a matrix given its LU decomposition
50  void sgetri_(int* N, float* A, int* lda, int* IPIV, float* WORK, int* LWORK, int* INFO);
51 }
52 
53 namespace bo = boost;
54 using namespace std;
55 namespace freecontact {
56 
57 // When you change these, rename BLAS/LAPACK calls for correct precision.
58 typedef float cov_fp_t;
59 typedef float fp_t;
60 
61 template<typename _Tp>
62 class ct_vector : public std::vector<_Tp> {
63  public:
64  uint16_t alilen;
65 
66  ct_vector(uint16_t __alilen = 0) : std::vector<_Tp>(__alilen*__alilen), alilen(__alilen) {}
67 
68  // Does not check range.
69  inline _Tp& operator()(uint16_t __i, uint16_t __j)
70  {
71  return this->_M_impl._M_start[ __i*alilen + __j ];
72  }
73  // Does not check range.
74  inline const _Tp& operator()(uint16_t __i, uint16_t __j) const
75  {
76  return this->_M_impl._M_start[ __i*alilen + __j ];
77  }
78 };
79 
80 template<typename _Tp>
81 class af_vector : public std::vector<_Tp> {
82  public:
83  uint16_t alilen;
84  uint8_t q; // letters
85 
86  af_vector(uint16_t __alilen, uint8_t __q, _Tp __v) : std::vector<_Tp>(__alilen*__q, __v), alilen(__alilen), q(__q) {}
87 
88  inline _Tp& operator()(uint16_t __i, uint8_t __ai)
89  {
90  return this->_M_impl._M_start[ __i*q + __ai ];
91  }
92  inline const _Tp& operator()(uint16_t __i, uint8_t __ai) const
93  {
94  return this->_M_impl._M_start[ + __i*q + __ai ];
95  }
96 };
97 
98 class pf_vector : public std::vector<predictor::pairfreq_t> {
99  typedef predictor::pairfreq_t _Tp;
100  public:
101  uint16_t alilen;
102  uint8_t q; // letters
103  size_t d3, d2, d1;
104 
105  pf_vector() : std::vector<_Tp>(), alilen(0), q(0), d3(0), d2(0), d1(0) {}
106  pf_vector(uint16_t __alilen, uint8_t __q, _Tp __v) : std::vector<_Tp>(__alilen*__alilen*__q*__q, __v), alilen(__alilen), q(__q), d3(q), d2(q*d3), d1(alilen*d2) {}
107 
108  inline _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj)
109  {
110  return this->_M_impl._M_start[ (__i)*d1 + (__j)*d2 + (__ai)*d3 + (__aj) ];
111  }
112  inline const _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) const
113  {
114  return this->_M_impl._M_start[ (__i)*d1 + (__j)*d2 + (__ai)*d3 + (__aj) ];
115  }
116 };
117 
118 template<typename _Tp>
119 class cov_vector : public std::vector<_Tp> {
120  public:
121  uint16_t alilen;
122  uint8_t q; // letters
123  size_t d1;
124 
125  cov_vector() : std::vector<_Tp>(), alilen(0), q(0), d1(0) {}
126  cov_vector(uint16_t __alilen, uint8_t __q) : std::vector<_Tp>(__alilen*__q*__alilen*__q),
127  alilen(__alilen), q(__q), d1(alilen*q) {}
128  cov_vector(uint16_t __alilen, uint8_t __q, _Tp __v) : std::vector<_Tp>(__alilen*__q*__alilen*__q, __v),
129  alilen(__alilen), q(__q), d1(alilen*q) {}
130 
131  // 2D
132  inline _Tp& operator()(size_t __row, size_t __col)
133  {
134  return this->_M_impl._M_start[ __row*d1 + __col ];
135  }
136  inline const _Tp& operator()(size_t __row, size_t __col) const
137  {
138  return this->_M_impl._M_start[ __row*d1 + __col ];
139  }
140  // 4D
141  inline _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj)
142  {
143  return this->_M_impl._M_start[ (__i*q+__ai)*d1 + __j*q+__aj ];
144  }
145  inline const _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) const
146  {
147  return this->_M_impl._M_start[ (__i*q+__ai)*d1 + __j*q+__aj ];
148  }
149 };
150 
153  public:
154  virtual double operator()(const uint16_t i, const uint16_t j) const = 0;
155 };
156 
158 template<typename _Tp, typename _Tri = size_t, typename _Tci = size_t>
159 class d2matrix : public std::vector<_Tp> {
160  public:
161  _Tri rows;
162  _Tci cols;
163 
164  d2matrix(_Tri __rows, _Tci __cols) : std::vector<_Tp>(__rows*__cols), rows(__rows), cols(__cols)
165  {
166  memset(this->_M_impl._M_start, 0, rows*cols*sizeof(_Tp));
167  }
168  inline _Tp& operator()(_Tri __r, _Tci __c)
169  {
170  return this->_M_impl._M_start[ __r*cols + __c ];
171  }
172  inline const _Tp& operator()(_Tri __r, _Tci __c) const
173  {
174  return this->_M_impl._M_start[ __r*cols + __c ];
175  }
176 };
178 
179 static void _raw_score_matrix(map<string, ct_vector<fp_t> > &__raw_ctscore, map<string, vector<double> > &__apc_bg, map<string, double> &__apc_mean,
180  const uint16_t __alilen, const string &__key, const _rawscore_calc_t &__fo );
181 
182 static vector<contact_t>
183  _apc( const ct_vector<fp_t>& __raw_ctscore, const vector<double>& __apc_bg, const double __apc_mean, const uint16_t __mincontsep, bool __filt );
184 
185 static inline vector<contact_t>
186  _raw_as_is( const ct_vector<fp_t>& __raw_ctscore, const uint16_t __mincontsep );
187 
188 static inline uint32_t
189  _cache_holds_nseq(uint16_t __seqsize);
190 
191 #ifndef __SSE2__
192 static inline __m128i _mm_setzero_si128(){ __m128i m; long long *mp = (long long *)&m; mp[1] = mp[0] = 0; return m; }
193 #endif
194 
195 
197  timer_t _timerid;
198 
199  static void _brk(sigval_t __sigval)
200  {
201  *static_cast<int*>(__sigval.sival_ptr) = 1;
202  }
203 
204  // this is a resource - disable copy contructor and copy assignment
205  _glasso_timer(const _glasso_timer&){}
206  _glasso_timer& operator=(const _glasso_timer&){return *this;}
207  public:
208  bool dbg;
209  _glasso_timer(int *__sival_ptr, time_t __tv_sec, bool __dbg = false) : dbg(__dbg)
210  {
211  struct sigevent sev;
212  sev.sigev_notify = SIGEV_THREAD;
213  sev.sigev_notify_function = _brk;
214  sev.sigev_value.sival_ptr = __sival_ptr;
215  sev.sigev_notify_attributes = NULL;
216  if (timer_create(CLOCK_PROCESS_CPUTIME_ID, &sev, &_timerid) == -1)
217  {
218  if (dbg) cerr << "timer_create(CLOCK_PROCESS_CPUTIME_ID, ...): " << strerror(errno) << " at " << __FILE__ << ":" << __LINE__ << "\n";
219  // In case timer_create fails with CLOCK_PROCESS_CPUTIME_ID because clock_getcpuclockid() is not supported (e.g. on kfreebsd):
220  if (timer_create(CLOCK_MONOTONIC, &sev, &_timerid) == -1) throw std::runtime_error(bo::str(bo::format("%s at %s:%d") % strerror(errno) % __FILE__ % __LINE__ ));
221  }
222  struct itimerspec its;
223  its.it_value.tv_sec = __tv_sec;
224  its.it_value.tv_nsec = 0;
225  its.it_interval.tv_sec = 0;
226  its.it_interval.tv_nsec = 0;
227  if (timer_settime(_timerid, 0, &its, NULL) == -1) throw std::runtime_error(bo::str(bo::format("%s at %s:%d") % strerror(errno) % __FILE__ % __LINE__ ));
228  }
229 
230  virtual ~_glasso_timer()
231  {
232  if (timer_delete(_timerid) == -1) throw std::runtime_error(strerror(errno));
233  }
234 };
235 
236 void predictor::get_seq_weights(
237  freq_vec_t &__aliw, double &__wtot,
238  const ali_t& __ali, double __cp,
239  bool __veczw, int __num_threads
240  ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error)
241 {
242  if (__ali.seqcnt < 2) throw alilen_error(bo::str(bo::format("alignment size (%d) < 2") % __ali.seqcnt));
243  // lkajan: the present implementation is limited to alilen 4080.
244  if (__ali.alilenpad > 4080) throw alilen_error(bo::str(bo::format("alignment (%d) longer than 4080") % __ali.alilenpad));
245  if (__cp < 0 || __cp > 1) throw range_error(bo::str(bo::format("clustering threshold is out of range [0-1] %g") % __cp ));
246  if (__num_threads < 0) throw range_error(bo::str(bo::format("number of threads is out of range [0-) %d") % __num_threads ));
247 
248  #ifdef HAVE_OPENMP
249  int num_threads = __num_threads ? __num_threads : omp_get_max_threads();
250  if(dbg) cerr << "will use " << num_threads << " OMP threads\n";
251  #else
252  int num_threads = 1;
253  if(dbg) cerr << "no OMP thread support\n";
254  #endif
255 
256  // Calculate BLOSUM sequence weights at clustering threshold __cp: "sequence segments that are identical for at least that percentage of amino acids are grouped together".
257  // A modification of the method in "Amino acid substitution matrices from protein blocks. Henikoff and Henikoff 1992. PNAS.", because the weight is not established for all members of
258  // the cluster from the size of the __cp cluster, but instead individually, from the number of sequences found at >= __cp. This is the method of mfDCA and PSICOV.
259  // Gaps and Xs also contribute to matches/mismatches, notably X matches only [JOUX].
260  time_t t0 = time(NULL);
261 
262  uint16_t mismthresh = __ali.alilen - ceil( __ali.alilen * __cp );
263  int matchthresh = __ali.alilenpad - mismthresh; // minimum number of matches for clustering seq
264 
265  uint32_t cache_holds_nseq = _cache_holds_nseq(__ali.alilenpad);
266  simcnt_t simcnt(__ali.seqcnt, num_threads);
267 
268  // lkajan: allow for other vectorizations as well
269  #define CAN_VECW 0
270  #ifdef __SSE2__
271  #undef CAN_VECW
272  #define CAN_VECW 1
273  #endif
274 
275  if(!CAN_VECW || !__veczw)
276  {
277  // lkajan: The SSE2 implementation came first. Aim was to keep changes to the minimum here. That's why this implementation looks a bit weird.
278  // lkajan: Look out: the iterations take less and less time, so a division by first-half/second-half, as seems to be the default, leads to thread starvation.
279  #ifdef HAVE_OPENMP
280  #pragma omp parallel for num_threads(num_threads), schedule(dynamic)
281  #endif
282  for(uint32_t i=0; i<__ali.seqcnt-1; ++i)
283  {
284  if(dbg
285  #ifdef HAVE_OPENMP
286  && omp_get_thread_num() == 0
287  #endif
288  )
289  { time_t t1 = time(NULL); if( t1 != t0 ) { t0 = t1; fprintf(stderr, "%s%d/%d", "␛[1G␛[K", i, __ali.seqcnt); } } // xterm
290 
291  const uint8_t *a0 = &__ali(i,0);
292 
293  for(uint32_t j=i+1; j<__ali.seqcnt; ++j)
294  {
295  const uint8_t *a = a0, *b = &__ali(j,0);
296  uint16_t matc128 = 0;
297 
298  for(uint16_t k = 0, k_end = __ali.alilenpad; k < k_end; ++k)
299  matc128 += ( a[k] == b[k] );
300 
301  if( matc128 >= matchthresh )
302  {
303  #ifndef HAVE_OPENMP
304  int thread_num = 0;
305  #else
306  int thread_num = omp_get_thread_num();
307  #endif
308  ++simcnt(i,thread_num);
309  ++simcnt(j,thread_num);
310  }
311  }
312  }
313  }
314  #ifdef __SSE2__
315  else
316  {
317  // lkajan: Dynamically set chunk size according to (L1) cache size (cat /sys/devices/system/cpu/cpu0/cache/index*/{coherency_line_size,level,size,type}).
318  // lkajan: Would prefetching with _mm_prefetch(ptr, _MM_HINT_T0) help?
319  uint32_t wchunk = cache_holds_nseq / 2;
320 
321  if(dbg) cerr << "SSE2 veczweight, wchunk = "<< wchunk <<"\n";
322 
323  __m128i m0 = _mm_setzero_si128();
324  __m128i m1 = _mm_cmpeq_epi8( m0, m0 );
325 
326  uint32_t chunk_e = (__ali.seqcnt-2)/wchunk + 1;
327  #ifdef HAVE_OPENMP
328  #pragma omp parallel for num_threads(num_threads), schedule(dynamic)
329  #endif
330  for(uint32_t chunki=0; chunki<chunk_e; ++chunki)
331  {
332  // chunkj == chunki - diagonal
333  {
334  if(dbg
335  #ifdef HAVE_OPENMP
336  && omp_get_thread_num() == 0
337  #endif
338  )
339  { time_t t1 = time(NULL); if( t1 != t0 ) { t0 = t1; fprintf(stderr, "%s%d,%d/%d", "␛[1G␛[K", chunki, chunki, chunk_e); } } // xterm
340 
341  uint32_t i_b = chunki*wchunk;
342  uint32_t i_e = min( i_b+wchunk, __ali.seqcnt-1 );
343  uint32_t j_e = min( i_b+wchunk, __ali.seqcnt );
344  for(uint32_t i=i_b; i<i_e; ++i)
345  {
346  __m128i *a0 = (__m128i*)&__ali(i,0);
347 
348  // lkajan: This is a macro, because the inline function equivalent is 5% slower, although the timings are slightly weird.
349  // m1: matches, counted /down/ from 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
350  // lkajan: My attempts to accelerate the followin by early break on (matc >= matchthresh || (k<<4) - matc > mismthresh) have failed.
351  // matc128 = _mm_add_epi8( m, matc128 ): look out, good only till seq len <= 4080
352 #define _count_matches_sse2_helper(__omp_get_thread_num) {\
353  __m128i *a = a0, *b = (__m128i*)&__ali(j,0);\
354  __m128i matc128 = m1;\
355 \
356  for(uint16_t k = 0, k_end = __ali.alilen16; k < k_end; ++k) \
357  {\
358  __m128i m = _mm_cmpeq_epi8( a[k], b[k] );\
359  matc128 = _mm_add_epi8( m, matc128 );\
360  }\
361 \
362  matc128 = _mm_sad_epu8( m1, matc128 );\
363  matc128 = _mm_add_epi32( matc128,\
364  _mm_srli_si128( matc128, 8 ) );\
365 \
366  if( _mm_cvtsi128_si32(matc128) >= matchthresh )\
367  {\
368  int thread_num = (__omp_get_thread_num);\
369  ++simcnt(i,thread_num);\
370  ++simcnt(j,thread_num);\
371  } }
372 #ifdef HAVE_OPENMP
373 #define _count_matches_sse2() _count_matches_sse2_helper(omp_get_thread_num())
374 #else
375 #define _count_matches_sse2() _count_matches_sse2_helper(0)
376 #endif
377  for(uint32_t j = i+1; j<j_e; ++j)
378  _count_matches_sse2();
379  }
380  }
381 
382  // off-diagonal chunks
383  for(uint32_t chunkj=chunki+1; chunkj<chunk_e; ++chunkj)
384  {
385  if(dbg
386  #ifdef HAVE_OPENMP
387  && omp_get_thread_num() == 0
388  #endif
389  )
390  { time_t t1 = time(NULL); if( t1 != t0 ) { t0 = t1; fprintf(stderr, "%s%d,%d/%d", "␛[1G␛[K", chunki, chunkj, chunk_e); } } // xterm
391 
392  uint32_t i_b = chunki*wchunk;
393  uint32_t i_e = min( i_b+wchunk, __ali.seqcnt-1 );
394  uint32_t j_b = chunkj*wchunk;
395  uint32_t j_e = min( j_b+wchunk, __ali.seqcnt );
396  for(uint32_t i=i_b; i<i_e; ++i)
397  {
398  __m128i *a0 = (__m128i*)&__ali(i,0);
399 
400  for(uint32_t j=j_b; j<j_e; ++j)
401  _count_matches_sse2();
402  }
403  }
404 #undef _count_matches_sse2
405 #undef _count_matches_sse2_helper
406  }// for chunki
407  }
408  #else // should never happen unless bug
409  else throw std::runtime_error(bo::str(bo::format("ooops, there is a bug in %s around %s:%d") % PACKAGE % __FILE__ % __LINE__ ));
410  #endif // __SSE2__
411 
412  __wtot = 0; // total weight of all aligments, EVfold-mfDCA::Meff
413  __aliw = freq_vec_t(__ali.seqcnt); // alignment weight array, EVfold-mfDCA::W
414 
415  for(uint32_t i = 0; i<__ali.seqcnt; ++i)
416  {
417  #ifndef HAVE_OPENMP
418  int thread_num = 0;
419  uint32_t sci = simcnt(i,thread_num);
420  #else
421  uint32_t sci = 0;
422  for(int thread_num = 0; thread_num < num_threads; ++thread_num)
423  sci += simcnt(i,thread_num);
424  #endif
425  __wtot += (__aliw[i] = 1.0 / (1 + sci));
426  }
427 
428  if(dbg) fprintf(stderr, "total weight (variation) of alignment = %16.15g\n", __wtot );
429 
430  return;
431 }
432 
434  predictor::run(const ali_t &__ali, const freq_vec_t &__aliw, const double __wtot,
435  double __dens, double __gapth, uint16_t __mincontsep,
436  double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
437  bool __cov20, bool __apply_gapth, double __rho,
438  int __num_threads, time_t __icme_timeout, time_res_t *__timing
439  ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error)
440 {
441  if (__ali.seqcnt < 2) throw alilen_error(bo::str(bo::format("alignment size (%d) < 2") % __ali.seqcnt));
442  if (__ali.seqcnt != __aliw.size()) throw alilen_error(bo::str(bo::format("alignment size (%d) != alignment weight vector size (%d)") % __ali.seqcnt % __aliw.size()));
443  // operations on unsigned loop bounds make this limit necessary
444  if (__ali.alilen < 4) throw alilen_error(bo::str(bo::format("alignment (%d) shorter than 4") % __ali.alilen));
445  if (__ali.alilen < __mincontsep+1) throw alilen_error(bo::str(bo::format("alignment (%d) shorter than __mincontsep (%d) + 1") % __ali.alilen % __mincontsep));
446  // lkajan: PSICOV exits with failure here if wtot < __ali.alilen.
447  //if (__wtot < __ali.alilen) throw alilen_error(bo::str(bo::format("total alignment weight (%g) < alignment length (%d)") % __wtot % __ali.alilen));
448  if(__dens < 0 || __dens > 1) throw range_error(bo::str(bo::format("contact density is out of range [0-1] %g") % __dens ));
449  if(__gapth < 0 || __gapth > 1) throw range_error(bo::str(bo::format("gap threshold is out of range [0-1] %g") % __gapth ));
450  if(__mincontsep < 1) throw range_error(bo::str(bo::format("minimum contact separation is out of range [1-) %d") % __mincontsep ));
451  if(__num_threads < 0) throw range_error(bo::str(bo::format("number of threads is out of range [0-) %d") % __num_threads ));
452  if(__pseudocnt < 0) throw range_error(bo::str(bo::format("pseudo count is out of range [0-) %g") % __pseudocnt ));
453  if(__pscnt_weight < 0 || __pscnt_weight > 1) throw range_error(bo::str(bo::format("pseudo count weight is out of range [0-1] %g") % __pscnt_weight ));
454  if(__shrink_lambda < 0 || __shrink_lambda > 1) throw range_error(bo::str(bo::format("shrinkage lambda is out of range [0-1] %g") % __shrink_lambda ));
455 
456  #ifdef HAVE_OPENMP
457  int num_threads = __num_threads ? __num_threads : omp_get_max_threads();
458  if(dbg) cerr << "will use " << num_threads << " OMP threads\n";
459  #else
460  int num_threads = 1;
461  if(dbg) cerr << "no OMP thread support\n";
462  #endif
463 
464  if(__timing) (*__timing)["num_threads"] = num_threads;
465  //timeval t_run; gettimeofday(&t_run, NULL);
466 
467  // Calculate column-wise AA frequencies with pseudocount and weighting, 21-letter alphabet.
468  const double ps_wtot = __pseudocnt * 21 + __wtot; // lkajan: sum( pa[i][*] ) and sum( pab[i][j][*][*] ) is __wtot, if no pseudocounts are used
469  af_vector<freq_t> aafreq( __ali.alilen, 21, __pseudocnt / ps_wtot ); // aafreq[__ali.alilen][21], this is EVfold-mfDCA::Pi_true with __pseudocnt == 0
470 
471  freq_t ps_aliw[__ali.seqcnt]; memcpy(ps_aliw, __aliw.data(), __ali.seqcnt*sizeof(freq_t));
472  cblas_dscal(__ali.seqcnt, 1/ps_wtot, ps_aliw, 1); // scale vector by a constant
473 
474  for(uint32_t k = 0; k < __ali.seqcnt; ++k)
475  {
476  for(uint16_t j = 0; j < __ali.alilen; ++j)
477  {
478  uint8_t aa = __ali(k,j);
479  if(aa < 21) aafreq(j,aa) += ps_aliw[k]; // if aa is not X or alike
480  }
481  }
482 
483  vector<uint16_t> gap_cols;
484  if(__gapth<1)
485  for(uint16_t i = 0; i < __ali.alilen; ++i)
486  {
487  if( aafreq(i,aamap_gapidx) > __gapth ) gap_cols.push_back(i);
488  }
489 
490  if(dbg) cerr << "calculated column aa frequencies, gap cols = " << gap_cols.size() << "\n";
491 
492  // Calculate AA-pair frequencies with pseudocount and weighting, 21-letter alphabet.
493  pf_vector pairfreq( __ali.alilen, 21, __pseudocnt/21.0 / ps_wtot ); // pairfreq[__ali.alilen][__ali.alilen][21][21], this is EVfold-mfDCA::Pij_true with __pseudocnt == 0
494 
495  timeval t_before;
496  gettimeofday(&t_before, NULL);
497  {
498  for(uint32_t k = 0; k<__ali.seqcnt; ++k)
499  {
500  freq_t ps_aliwk = ps_aliw[k];
501  const uint8_t *alik = &__ali(k,0);
502 
503  #ifdef HAVE_OPENMP
504  #pragma omp parallel for num_threads(num_threads), schedule(static)
505  #endif
506  for(uint16_t i = 0; i < (__ali.alilen-2)/2+1; ++i)
507  {
508 #define _fill_pfi(__i) {\
509  pairfreq_t *pfi = &pairfreq(__i,0,0,0);\
510  for(uint16_t j = __i+1; j < __ali.alilen; ++j)\
511  {\
512  uint8_t ai = alik[__i];\
513  uint8_t aj = alik[j];\
514  if(ai < 21 && aj < 21)\
515  pfi[ j*pairfreq.d2 + ai*pairfreq.d3 + aj ] += ps_aliwk;\
516  } }
517  //
518  _fill_pfi(i);
519 
520  uint16_t ri = __ali.alilen-2-i;
521  if(ri!=i)
522  _fill_pfi(ri);
523 #undef _fill_pfi
524  }
525  }
526 
527  for(uint16_t i = 0; i < __ali.alilen-1; ++i)
528  for(uint16_t j = i+1; j < __ali.alilen; ++j)
529  {
530  pairfreq_t *pfij = &pairfreq(i,j,0,0), *pfji = &pairfreq(j,i,0,0); // lkajan: this is faster
531  for(uint8_t ai = 0; ai < 21; ++ai) // -funroll-loops to unroll such loops? vectorize?
532  for(uint8_t aj = 0; aj < 21; ++aj)
533  pfji[ aj*pairfreq.d3 + ai ] = pfij[ ai*pairfreq.d3 + aj ];
534  }
535 
536  for(uint16_t i = 0; i < __ali.alilen; ++i)
537  {
538  // zero out 21x21 matrix
539  memset(pairfreq.data() + i*pairfreq.d1 + i*pairfreq.d2, 0, 21*21*sizeof(pairfreq_t));
540  for(uint8_t aa = 0; aa < 21; ++aa)
541  pairfreq(i,i,aa,aa) = aafreq(i,aa);
542  }
543  }
544  { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff);
545  if(__timing) (*__timing)["pairfreq"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "calculated pair frequency table in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6 ); }
546 
547  // lkajan: Do MI_true calculation before changing aafreq and pairfreq:
548  map<string, ct_vector<fp_t> > raw_ctscore;
549  map<string, vector<double> > apc_bg;
550  map<string, double> apc_mean;
551  // EVfold-mfDCA MI_true
552  class _rawscore_MI_true_t : public _rawscore_calc_t {
553  const af_vector<freq_t> &_Pi_true;
554  const pf_vector &_Pij_true;
555  public:
556  _rawscore_MI_true_t(const af_vector<freq_t> &__Pi_true, const pf_vector &__Pij_true ) : _Pi_true(__Pi_true), _Pij_true(__Pij_true) {}
557  virtual double operator()(const uint16_t i, const uint16_t j) const
558  {
559  double raw_ij = 0;
560  for(uint8_t ai = 0; ai < 21; ++ai)
561  for(uint8_t aj = 0; aj < 21; ++aj)
562  if(_Pij_true(i,j,ai,aj) > 0)
563  raw_ij += _Pij_true(i,j,ai,aj) * log( _Pij_true(i,j,ai,aj) / _Pi_true(i,ai) / _Pi_true(j,aj) );
564  return raw_ij;
565  }
566  } _rawscore_MI_true(aafreq, pairfreq);
567  _raw_score_matrix(raw_ctscore, apc_bg, apc_mean, __ali.alilen, "MI", _rawscore_MI_true );
568  if(dbg) fprintf(stderr, "collected apc_mean[MI] = %16.15g\n", apc_mean["MI"]);
569 
570  // EVfold-mfDCA::with_pc() pseudocount weight function implementation
571  {
572  pf_vector Pij_true = pairfreq;
573  if( __pscnt_weight != 0.0 )
574  {
575  for(size_t i = 0; i < pairfreq.size(); ++i) pairfreq[i] = (1-__pscnt_weight)*pairfreq[i] + __pscnt_weight/21/21; // vectorize?
576  for(size_t i = 0; i < aafreq.size(); ++i) aafreq[i] = (1-__pscnt_weight)*aafreq[i] + __pscnt_weight/21; // vectorize?
577 
578  for(uint16_t i = 0; i < __ali.alilen; ++i)
579  for(uint8_t ai = 0; ai < 21; ++ai)
580  for(uint8_t aj = 0; aj < 21; ++aj)
581  pairfreq(i,i,ai,aj) = ai==aj ? (1-__pscnt_weight)*Pij_true(i,i,ai,aj) + __pscnt_weight/21 : 0;
582  }
583  }
584 
585  if(dbg)
586  {
587  // compute sum of pairfreq matrix
588  double aafs = 0; // aafreq sum
589  double pfs = 0;
590  for(uint16_t i = 0; i < __ali.alilen; ++i)
591  for(uint16_t j = 0; j < __ali.alilen; ++j)
592  for(uint8_t ai = 0; ai < 21; ++ai)
593  {
594  if(j==0) aafs += aafreq(i,ai);
595  for(uint8_t aj = 0; aj < 21; ++aj)
596  pfs += pairfreq(i,j,ai,aj);
597  }
598  fprintf(stderr, "aa freq sum (cell) = %.15g, pairfreq sum (cell) = %.15g\n", aafs/__ali.alilen, pfs/__ali.alilen/__ali.alilen);
599  }
600 
601  #ifdef RETURN_AFTER_PAIRFREQ
602  cerr << "returning because of RETURN_AFTER_PAIRFREQ " << __FILE__ << ":" << __LINE__ << "\n";
603  return map<string, vector<contact_t> >();
604  #endif
605 
606  // Forming the covariance matrix.
607  // lkajan: EVfold-mfDCA: covq = 20, the last letter ('Y') is simply left off. In this code '-' (gap) is left off.
608  // lkajan: Gaps and the actual inversion of the covariance matrix (as opposed to estimation with glasso):
609  // lkajan: * Leave ( aafreq(i,aamap_gapidx) > __gapth || aafreq(j,aamap_gapidx) > __gapth ) columns and rows entirely out of the cov matrix.
611  __apply_gapth ? __ali.alilen - gap_cols.size() : __ali.alilen,
612  __cov20 ? 20 : 21,
613  0.0
614  );
615 
616  uint16_t i = 0, covi = 0;
617  vector<uint16_t>::const_iterator gc_bi = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ei = gap_cols.end();
618  for(; i < __ali.alilen; ++i)
619  {
620  if(gc_bi!=gc_ei && *gc_bi == i){ ++gc_bi; continue; } // too many gaps in this row
621 
622  uint16_t j = 0, covj = 0;
623  vector<uint16_t>::const_iterator gc_bj = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ej = gap_cols.end();
624  for(; j < __ali.alilen; ++j)
625  {
626  if(gc_bj!=gc_ej && *gc_bj == j){ ++gc_bj; continue; } // too many gaps in this col
627 
628  for(uint8_t ai = 0; ai < covm.q; ++ai)
629  for(uint8_t aj = 0; aj < covm.q; ++aj)
630  covm(covi,covj,ai,aj) = pairfreq(i,j,ai,aj) - aafreq(i,ai) * aafreq(j,aj);
631  ++covj;
632  }
633  ++covi;
634  }
635  { pf_vector _empty; pairfreq.swap(_empty); } // lkajan: this trick releases memory, unlike clear()
636 
637  // lkajan: psicov shrinks covm in infinite loop without zeroing those cells out:
638  if(__estimate_ivcov) // lkajan: control it by matrix inversion/estimation method
639  for(uint16_t i = 0; i < covm.alilen; ++i)
640  for(uint8_t ai = 0; ai < covm.q; ++ai)
641  for(uint8_t aj = 0; aj < covm.q; ++aj)
642  if(ai != aj) covm(i,i,ai,aj) = 0;
643  if(dbg) cerr<<"formed covariance matrix ("<<covm.alilen<<"/"<<__ali.alilen<<","<<gap_cols.size()<<")\n";
644 
645  // Shrinking the sample covariance matrix for positive definite-ness, testing with LAPACK Cholesky factorization.
646  // lkajan: This speeds up glasso/glassofast, positive definite-ness is required (invertibility is not enough).
647  // lkajan: How long would a full matrix inversion take? That is pretty fast, actually, with LAPACK, see below.
648  if(__shrink_lambda>0)
649  {
650  gettimeofday(&t_before, NULL);
651 
652  double dmean = 0;
653  for(size_t i = 0; i < covm.d1; ++i) dmean += covm(i, i);
654  dmean /= covm.d1;
655  const double& lambda = __shrink_lambda; // lkajan: A constant lambda seems crude. See if a mathematician can think of a better solution.
656 
657  for (;;)
658  {
659  if(dbg) cerr << "cov matrix Cholesky...";
660 
661  int cholres;
662  {
663  vector<float> tempmat = covm;
664  int N = covm.d1;
665  spotrf_("L",&N,tempmat.data(),&N,&cholres);
666  if(cholres < 0) throw std::runtime_error(bo::str(bo::format("illegal value for argument %d") % -cholres ));
667  }
668  if(cholres==0) break;
669 
670  if(dbg) cerr << " not positive definite";
671 
672  for(uint16_t i = 0; i < covm.alilen; ++i)
673  for(uint16_t j = 0; j < covm.alilen; ++j)
674  for(uint8_t ai = 0; ai < covm.q; ++ai)
675  for(uint8_t aj = 0; aj < covm.q; ++aj)
676  if(i != j)
677  covm(i,j,ai,aj) *= (1.0 - lambda);
678  else if(ai == aj)
679  covm(i,j,ai,aj) = lambda*dmean + (1.0-lambda)*covm(i,j,ai,aj);
680 
681  if(dbg)
682  {
683  double dmean_dev = 0;
684  for(size_t i = 0; i < covm.d1; ++i) dmean_dev += pow(dmean - covm(i,i), 2);
685  dmean_dev /= covm.d1; dmean_dev = sqrt(dmean_dev);
686  fprintf(stderr, ", dmean dev: %g\n", dmean_dev );
687  }
688  }
689  { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff);
690  if(__timing) (*__timing)["shrink"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "\nshrank covariance matrix in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6 ); }
691  }
692 
693  #ifdef LIBFREEC_CHKPT
694  {
695  FILE *f = fopen("covm.dat", "w");
696  do {
697  if( fwrite(&covd, sizeof(covd), 1, f) != 1 ) { cerr << "error writing 'covm.dat': " << strerror(errno) << "\n"; fclose(f); unlink("covm.dat"); break; }
698  if( fwrite(&galilen, sizeof(galilen), 1, f) != 1 ) { cerr << "error writing 'covm.dat': " << strerror(errno) << "\n"; fclose(f); unlink("covm.dat"); break; }
699  if( fwrite( covm.data(), sizeof(*covm.data()), covm.size(), f ) != covm.size() )
700  {
701  cerr << "error writing 'covm.dat': " << strerror(errno) << "\n";
702  fclose(f); unlink("covm.dat");
703  break;
704  }
705  cerr << "wrote 'covm.dat'\n";
706  fclose(f);
707  } while(false);
708  }
709  #endif
710 
711  // Invert the covariance matrix, aiming for __dens precision matrix density.
712  fp_t rho = __rho;
713  fp_t rhofac = 0;
714  cov_vector<g_fp_t> wwi( covm.alilen, covm.q ); // inverse covariance matrix
715 
716  gettimeofday(&t_before, NULL);
717  if(__estimate_ivcov)
718  {
719  fp_t density = -1, prev_dens = -1;
720  do {
721  // Set new rho - the bigger rho, the smaller density gets
722  if( rho < 0 ) rho = std::max(0.001, 1.0 / __wtot);
723  else if(density >= 0)
724  {
725  if( density == 0 ) rhofac = 0.5;
726  else
727  {
728  if( rhofac != 0 && prev_dens > 0 && density > 0 )
729  rhofac = pow( rhofac, log( __dens / density ) / log( density / prev_dens ) );
730  else
731  rhofac = __dens > density ? 0.9 : 1.1; // lkajan: be more aggressive here? 0.5 : 2?
732  }
733  rho *= rhofac;
734  }
735 
736  int g_ia = 0; // exact solution
737  if(dbg) fprintf(stderr, "will try rho %g, %s solution\n", rho, (g_ia ? "approximate" : "exact"));
738 
739  if(rho <= 0 || rho >= 1) throw std::runtime_error(bo::str(bo::format("regularization parameter rho is out of expected range (0-1): %g") % rho ));
740 
741  cov_vector<g_fp_t> rho_m( covm.alilen, covm.q, rho );
742 
743  for(uint16_t i = 0; i < rho_m.alilen; ++i)
744  for(uint16_t j = 0; j < rho_m.alilen; ++j)
745  for(uint8_t ai = 0; ai < rho_m.q; ++ai)
746  for(uint8_t aj = 0; aj < rho_m.q; ++aj)
747  // Watch out, __ali.alilen may != galilen; __gapth filtering does not seem to make much sense with 0.5 __pscnt_weight.
748  if( (__ali.alilen==rho_m.alilen && ( aafreq(i,aamap_gapidx) > __gapth || aafreq(j,aamap_gapidx) > __gapth )) ||
749  (i == j && ai != aj )
750  )
751  rho_m(i,j,ai,aj) = 1e9;
752 
753  #ifdef LIBFREEC_CHKPT
754  {
755  FILE *f = fopen("rho_m.dat", "w");
756  do {
757  if( fwrite( rho_m.data(), sizeof(*rho_m.data()), rho_m.size(), f ) != rho_m.size() )
758  {
759  cerr << "error writing 'rho_m.dat': " << strerror(errno) << "\n";
760  fclose(f); unlink("rho_m.dat");
761  break;
762  }
763  cerr << "wrote 'rho_m.dat'\n";
764  fclose(f);
765  } while(false);
766  }
767  #endif
768 
769  // lkajan: estimate inverse covariance matrix
770  {
771  int g_n = covm.d1, g_is = 0, g_msg = dbg, g_maxit = 1e5, g_jerr, g_brk = 0;
772  int g_niter;
773  g_fp_t g_thr = 1e-4;
774  vector<g_fp_t> ww( g_n*g_n ); // estimated covariance matrix
775 
776  timeval t_before; if(dbg) gettimeofday(&t_before, NULL);
777 
778  {
779  // Set up a timer to break this process if it overruns some process cputime limit
780  _glasso_timer _gt(&g_brk, __icme_timeout, dbg);
781 
782  // No need to transpose for Fortran, matrix is symmetric.
783  {
784  vector<float> g_Wd(g_n), g_WXj(g_n);
785  glassofast_(&g_n, covm.data(), rho_m.data(), &g_thr, &g_maxit, &g_msg, &g_is, wwi.data(), ww.data(), g_Wd.data(), g_WXj.data(), &g_jerr, &g_brk);
786  switch (g_jerr) {
787  case 0:
788  break;
789  case 256:
790  throw icme_timeout_error(bo::str(bo::format("inverse covariance matrix estimation exceeded time limit (%d sec)") % __icme_timeout )); break;
791  default:
792  throw std::runtime_error("an error occurred in glassofast_");
793  }
794  g_niter = g_maxit;
795  }
796  }
797 
798  if(dbg){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff);
799  fprintf(stderr, "%d iterations of glassofast took %g secs\n", g_niter, t_diff.tv_sec+t_diff.tv_usec/1e6 ); }
800  }
801 
802  if( __dens == 0 ) break; // Contact density target is not set, no need to calculate density.
803 
804  prev_dens = density;
805  {
806  size_t nz = 0;
807  for(size_t i = 0; i < wwi.d1; ++i)
808  for(size_t j = i+1; j < wwi.d1; ++j)
809  if( wwi(i, j) != 0 ) ++nz;
810 
811  density = (fp_t)nz / ( wwi.d1 * (wwi.d1-1) / 2 ); // triangle w/o diagonal
812  }
813  if(dbg) fprintf(stderr, "density = %7g, target sp = %g, |density-target sp|/target sp = %g\n", density, __dens, fabs(density - __dens)/__dens);
814  }
815  while( __dens == 0 || fabs(__dens - density)/__dens > 0.01 );
816  // lkajan: Is 0.01 strict enough? 1% of the 3% still means 17 contacts for a 340-long protein. But does that matter? Perhaps make 0.01 a parameter.
817  }
818  else
819  {
820  // lkajan: LAPACK matrix inversion: fast, but does not disregard gaps like glasso* do through high regularization, and no control over density.
821  wwi = covm; { cov_vector<cov_fp_t> _empty; covm.swap(_empty); } // lkajan: this trick releases memory, unlike clear()
822 
823  int N = wwi.d1, INFO;
824  int IPIV[N+1];
825 
826  timeval t_before; if(dbg) gettimeofday(&t_before, NULL);
827 
828  sgetrf_(&N,&N,wwi.data(),&N,IPIV,&INFO);
829  if(INFO) throw std::runtime_error(bo::str(bo::format("LU factorization error %d") % INFO ));
830  // lkajan: we could handle the singular matrix case if we wanted to, e.g. by increasing __pscnt_weight.
831 
832  if(dbg){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff);
833  fprintf(stderr, "LU factorization took %g secs, ", t_diff.tv_sec+t_diff.tv_usec/1e6 ); }
834 
835  int LWORK = -1; vector<float> WORK(1);
836  sgetri_(&N,wwi.data(),&N,IPIV,WORK.data(),&LWORK,&INFO);
837  LWORK = N*N < WORK[0] ? N*N : WORK[0];
838  WORK.resize(LWORK);
839 
840  sgetri_(&N,wwi.data(),&N,IPIV,WORK.data(),&LWORK,&INFO);
841  if(INFO) throw std::runtime_error(bo::str(bo::format("matrix inversion error %d") % INFO ));
842 
843  if(dbg){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff);
844  fprintf(stderr, "inverted matrix (incl LUf) in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6 ); }
845  }
846  { cov_vector<cov_fp_t> _empty; covm.swap(_empty); } // lkajan: this trick releases memory, unlike clear()
847  if(__timing){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff);
848  (*__timing)["inv"] = t_diff.tv_sec+t_diff.tv_usec/1e6; }
849 
850  if(dbg)
851  {
852  size_t nz = 0;
853  double checksum = 0;
854  for(size_t i = 0; i < wwi.d1; ++i)
855  for(size_t j = i+1; j < wwi.d1; ++j)
856  if( wwi(i, j) != 0 ){ ++nz; checksum += wwi(i, j); }
857 
858  fp_t density = (fp_t)nz / wwi.d1 / (wwi.d1-1) * 2; // upper triangle w/o diagonal
859  fprintf(stderr, "density of inverse covariance matrix = %g (cksum %.7g)\n", density, checksum);
860  }
861 
862  // lkajan: Go back to __ali.alilen*covq x __ali.alilen*covq wwi matrix.
863  // lkajan: Well, we could go back to __ali.alilen*21 x __ali.alilen*21 even?
864  cov_vector<g_fp_t> wwiwg( __ali.alilen, wwi.q, 0 ); // wwi with gaps (if any)
865  if(wwi.alilen != wwiwg.alilen)
866  {
867  uint16_t i = 0, covi = 0;
868  vector<uint16_t>::const_iterator gc_bi = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ei = gap_cols.end();
869  for(; i < __ali.alilen; ++i)
870  {
871  if(gc_bi!=gc_ei && *gc_bi == i){ ++gc_bi; continue; } // gapped row
872 
873  uint16_t j = 0, covj = 0;
874  vector<uint16_t>::const_iterator gc_bj = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ej = gap_cols.end();
875  for(; j < __ali.alilen; ++j)
876  {
877  if(gc_bj!=gc_ej && *gc_bj == j){ ++gc_bj; continue; } // gapped col
878 
879  for(uint8_t ai = 0; ai < wwiwg.q; ++ai)
880  for(uint8_t aj = 0; aj < wwiwg.q; ++aj)
881  wwiwg(i,j,ai,aj) = wwi(covi,covj,ai,aj);
882  ++covj;
883  }
884  ++covi;
885  }
886  if(dbg) cerr<<"went back to gapped ("<<wwiwg.alilen<<") wwi matrix\n";
887  }
888  else
889  wwiwg.swap(wwi);
890  { cov_vector<g_fp_t> _empty; wwi.swap(_empty); } // lkajan: this trick releases memory, unlike clear()
891 
892  // lkajan: Idea: how about doing APC over the scores of contacts beyond __mincontsep, instead of 1 (excluding diagonal only)?
893  // PSICOV raw scores and background for average product correction (APC) [Dunn et al. 2008]
894  // l1-norm (PSICOV) raw scores
895  class _rawscore_l1norm_t : public _rawscore_calc_t {
896  const cov_vector<g_fp_t> &_wwiwg;
897  public:
898  _rawscore_l1norm_t(const cov_vector<g_fp_t> &__wwiwg ) : _wwiwg(__wwiwg) {}
899  virtual double operator()(const uint16_t i, const uint16_t j) const
900  {
901  double raw_ij = 0;
902  for(uint8_t ai = 0; ai < 20; ++ai) // disregard gap (index 20) - works also if __cov20 is in effect
903  for(uint8_t aj = 0; aj < 20; ++aj)
904  raw_ij += fabs(_wwiwg(i,j,ai,aj));
905  return raw_ij;
906  }
907  } _rawscore_l1norm(wwiwg);
908  _raw_score_matrix(raw_ctscore, apc_bg, apc_mean, __ali.alilen, "l1norm", _rawscore_l1norm );
909  if(dbg) fprintf(stderr, "collected apc_mean[l1norm] = %16.15g\n", apc_mean["l1norm"]);
910 
911  // Frobenius norm
912  // EVfold-mfDCA::calculate_cn_scores() up to APC bit: obtain EVfold-mfDCA::fn_scores - courtesy of Thomas Hopf
913  // Quoting Thomas: transform to zero-sum gauge according to Ekeberg et al.
914  // Quoting Thomas: calculate Frobenius norm for each pair of columns i, j
915  class _rawscore_fro_t : public _rawscore_calc_t {
916  const cov_vector<g_fp_t> &_wwiwg;
917  public:
918  _rawscore_fro_t(const cov_vector<g_fp_t> &__wwiwg) : _wwiwg(__wwiwg) {}
919  virtual double operator()(const uint16_t i, const uint16_t j) const
920  {
921  vector<float> W_mf( 21*21, 0.0 );
922  vector<double> rmean_W_mf( 21, 0.0 ); // row mean W_mf
923  vector<double> cmean_W_mf( 21, 0.0 ); // col mean W_mf
924  double mean2_W_mf = 0;
925  for(uint8_t ai = 0; ai < 20; ++ai) // lkajan: look out, 20, not covq!
926  for(uint8_t aj = 0; aj < 20; ++aj)
927  {
928  const double cell = ( W_mf[ ai*21+aj ] = -_wwiwg(i,j,ai,aj) );
929  mean2_W_mf += cell;
930  rmean_W_mf[ai] += cell/21;
931  cmean_W_mf[aj] += cell/21;
932  }
933  mean2_W_mf /= W_mf.size();
934 
935  // transform to zero-sum gauge according to Ekeberg et al.
936  double sum_sum_W_mf_zsg2 = 0.0;
937  for(uint8_t ai = 0; ai < 21; ++ai)
938  for(uint8_t aj = 0; aj < 21; ++aj)
939  {
940  double W_mf_zsg = (double)W_mf[ ai*21+aj ] - rmean_W_mf[ai] - cmean_W_mf[aj] + mean2_W_mf;
941  sum_sum_W_mf_zsg2 += W_mf_zsg*W_mf_zsg;
942  }
943 
944  // calculate Frobenius norm for each pair of columns i, j
945  double raw_ij = sqrt(sum_sum_W_mf_zsg2);
946 
947  return raw_ij;
948  }
949  } _rawscore_fro(wwiwg);
950  _raw_score_matrix(raw_ctscore, apc_bg, apc_mean, __ali.alilen, "fro", _rawscore_fro );
951  if(dbg) fprintf(stderr, "collected apc_mean[fro] = %16.15g\n", apc_mean["fro"]);
952 
953  { cov_vector<g_fp_t> _empty; wwiwg.swap(_empty); } // lkajan: this trick releases memory, unlike clear()
954 
955  // Calculate final scores with average product correction (APC, Dunn et al., 2008)
956  map<string, vector<contact_t> > res;
957  // l1norm
958  res["l1norm"] = _apc( raw_ctscore["l1norm"], apc_bg["l1norm"], apc_mean["l1norm"], __mincontsep, true );
959  // MI_true - report raw values, not apc
960  res["MI"] = _raw_as_is( raw_ctscore["MI"], __mincontsep );
961  // fro
962  res["fro"] = _apc( raw_ctscore["fro"], apc_bg["fro"], apc_mean["fro"], __mincontsep, false );
963  //
964  //{ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_run, &t_diff);
965  // if(__timing) (*__timing)["run"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "run done in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6); }
966  return res;
967 }
968 
969 
971  predictor::run(const ali_t& __ali, double __clustpc,
972  double __density, double __gapth, uint16_t __mincontsep,
973  double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
974  bool __cov20, bool __apply_gapth, double __rho,
975  bool __veczw, int __num_threads, time_t __icme_timeout, time_res_t *__timing
976  ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error)
977 {
978  timeval t_top; gettimeofday(&t_top, NULL);
979  timeval t_before; gettimeofday(&t_before, NULL);
980 
981  freq_vec_t aliw; double wtot;
982  get_seq_weights(aliw, wtot, __ali, __clustpc, __veczw, __num_threads);
983 
984  { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff);
985  if(__timing) (*__timing)["seqw"] = t_diff.tv_sec+t_diff.tv_usec/1e6;
986  if(dbg) fprintf(stderr, "\nseq weight loop for %d seqs took %g secs\n", __ali.seqcnt, t_diff.tv_sec+t_diff.tv_usec/1e6 ); }
987 
988  cont_res_t ret = run(__ali, aliw, wtot,
989  __density, __gapth, __mincontsep,
990  __pseudocnt, __pscnt_weight, __estimate_ivcov, __shrink_lambda,
991  __cov20, __apply_gapth, __rho,
992  __num_threads, __icme_timeout, __timing);
993 
994  { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_top, &t_diff);
995  if(__timing) (*__timing)["all"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "all done in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6); }
996 
997  return ret;
998 }
999 
1000 
1001 ali_t& ali_t::push(const std::vector<uint8_t>& __al) throw (alilen_error)
1002 {
1003  if( __al.size() != alilen ) throw alilen_error(bo::str(bo::format("alignment length mismatch, expected %d, got %d") % alilen % __al.size() ));
1004  resize( ++seqcnt*alilen16, _mm_setzero_si128() );
1005 
1006  uint8_t *alptr = reinterpret_cast<uint8_t*>(data()) + (seqcnt-1) * alilenpad;
1007  memcpy(alptr, __al.data(), __al.size());
1008 
1009  if( capacity() == size() )
1010  reserve( size() + 1024*alilen16 );
1011  return *this;
1012 }
1013 
1014 
1016 
1023 void _raw_score_matrix(map<string, ct_vector<fp_t> > &__raw_ctscore, map<string, vector<double> > &__apc_bg, map<string, double> &__apc_mean,
1024  const uint16_t __alilen, const string &__key, const _rawscore_calc_t &__fo )
1025 {
1026  ct_vector<fp_t>& raw_cts = __raw_ctscore[__key] = ct_vector<fp_t>( __alilen );
1027  vector<double>& apc_bg = __apc_bg[__key] = vector<double>( __alilen );
1028  double& mean = __apc_mean[__key] = 0;
1029 
1030  for(uint16_t i = 0; i < __alilen; ++i)
1031  for(uint16_t j = i+1; j < __alilen; ++j)
1032  {
1033  double raw_ij = __fo(i, j);
1034 
1035  raw_cts[ i*__alilen + j ] = raw_cts[ j*__alilen + i ] = raw_ij;
1036  mean += raw_ij;
1037 
1038  double bg_ij = raw_ij / (__alilen-1);
1039  apc_bg[i] += bg_ij;
1040  apc_bg[j] += bg_ij;
1041  }
1042  mean /= __alilen * (__alilen-1) / 2;
1043 }
1044 
1045 vector<contact_t> _apc( const ct_vector<fp_t>& __raw_ctscore, const vector<double>& __apc_bg, const double __apc_mean, const uint16_t __mincontsep, bool __filt )
1046 {
1047  const uint16_t alilen = __apc_bg.size();
1048 
1049  // we expect that __raw_ctscore is a alilen*alilen matrix
1050  vector<contact_t> res;
1051  res.reserve( alilen*(alilen-1)/2*0.03 ); // reserve space for 3% contacts
1052 
1053  for(int i = 0, e = alilen-__mincontsep; i < e; ++i)
1054  for(uint16_t j = i+__mincontsep; j < alilen; ++j)
1055  if(!__filt || __raw_ctscore(i,j) > 0 ) // lkajan: psicov has this - it does strip off a few predictions.
1056  res.push_back(
1057  contact_t( i, j,
1058  __raw_ctscore(i,j) - __apc_bg[i] * __apc_bg[j] / __apc_mean
1059  )
1060  );
1061  return res;
1062 }
1063 
1064 vector<contact_t> _raw_as_is( const ct_vector<fp_t>& __raw_ctscore, const uint16_t __mincontsep )
1065 {
1066  const uint16_t alilen = __raw_ctscore.alilen;
1067 
1068  vector<contact_t> res;
1069  res.reserve( alilen*(alilen-1)/2 );
1070 
1071  for(int i = 0, e = alilen-__mincontsep; i < e; ++i)
1072  for(uint16_t j = i+__mincontsep; j < alilen; ++j)
1073  res.push_back(
1074  contact_t( i, j,
1075  __raw_ctscore(i,j)
1076  )
1077  );
1078  return res;
1079 }
1080 
1081 static inline uint32_t
1082  _cache_holds_nseq(uint16_t __seqsize)
1083 {
1084  long l1_dcache_size = sysconf(_SC_LEVEL1_DCACHE_SIZE);
1085  //long l1_dcache_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
1086  long l2_cache_size = sysconf(_SC_LEVEL2_CACHE_SIZE);
1087  //long l2_cache_linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
1088  long l3_cache_size = sysconf(_SC_LEVEL3_CACHE_SIZE);
1089  //long l3_cache_linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
1090 
1091  uint32_t wchunk = 0;
1092 
1093  if(l1_dcache_size > 0) wchunk = 0.5*l1_dcache_size / __seqsize;
1094  if(wchunk < 4 && l2_cache_size > 0) wchunk = 0.5*l2_cache_size / __seqsize;
1095  if(wchunk < 4 && l3_cache_size > 0) wchunk = 0.5*l3_cache_size / __seqsize;
1096  if(wchunk < 4){ wchunk = 0.5*1048576 / __seqsize; // assume there is a 1MB cache somewhere, even if we can not detect it
1097  if(wchunk < 4) wchunk = 4;
1098  }
1099 
1100  // jobtest2:
1101  // 100000 => 86s
1102  //wchunk = 100000
1103  // L3 cache enough for 49152
1104  // 20000 => 50s
1105  //wchunk = 20000
1106  // L2 cache enough for 4096
1107  // 2000 => 42s
1108  //wchunk = 2000
1109  // L1 cache enough for 512
1110  // 200 => 42s
1111  //wchunk = 200
1112  // the below is the fastest so far, 50 is slower
1113  // 100 => 40.5s
1114  //wchunk = 100
1115 
1116  // emak:
1117  // 100000 => 67s
1118  //wchunk = 100000
1119  // L2 cache enough for 24576
1120  // 10000 => 36s
1121  //wchunk = 10000
1122  // L1 cache enough for 256
1123  // 100 => 34.6s
1124  //wchunk = 100
1125  // the below is the fastest so far, 25 is slower
1126  // 50 => 34.4s
1127  //wchunk = 50
1128 
1129  return wchunk;
1130 }
1131 
1132 } // namespace freecontact
1133 
1134 // vim:et:ts=4:ai:sw=4:
_glasso_timer(int *__sival_ptr, time_t __tv_sec, bool __dbg=false)
Calculates raw score for an i,j contact.
virtual double operator()(const uint16_t i, const uint16_t j) const =0
const _Tp & operator()(uint16_t __i, uint8_t __ai) const
Definition: freecontact.cpp:92
af_vector(uint16_t __alilen, uint8_t __q, _Tp __v)
Definition: freecontact.cpp:86
_Tp & operator()(uint16_t __i, uint8_t __ai)
Definition: freecontact.cpp:88
Protein sequence alignment.
Definition: freecontact.h:70
Alignment length exception.
Definition: freecontact.h:53
_Tp & operator()(size_t __row, size_t __col)
const _Tp & operator()(size_t __row, size_t __col) const
const _Tp & operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) const
cov_vector(uint16_t __alilen, uint8_t __q, _Tp __v)
_Tp & operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj)
cov_vector(uint16_t __alilen, uint8_t __q)
ct_vector(uint16_t __alilen=0)
Definition: freecontact.cpp:66
_Tp & operator()(uint16_t __i, uint16_t __j)
Definition: freecontact.cpp:69
const _Tp & operator()(uint16_t __i, uint16_t __j) const
Definition: freecontact.cpp:74
2-dimensional matrix.
_Tp & operator()(_Tri __r, _Tci __c)
const _Tp & operator()(_Tri __r, _Tci __c) const
d2matrix(_Tri __rows, _Tci __cols)
Inverse covariance matrix estimation timeout exception.
Definition: freecontact.h:59
_Tp & operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj)
pf_vector(uint16_t __alilen, uint8_t __q, _Tp __v)
const _Tp & operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) const
std::map< std::string, std::vector< contact_t > > cont_res_t
Definition: freecontact.h:162
std::map< std::string, double > time_res_t
Definition: freecontact.h:166
std::vector< freq_t > freq_vec_t
Definition: freecontact.h:165
#define HAVE_OPENMP
Definition: config.h:42
#define PACKAGE
Definition: config.h:96
#define CAN_VECW
void spotrf_(const char *UPLO, const int *N, float *A, const int *LDA, int *INFO)
void glassofast_(int *, g_fp_t *, g_fp_t *, g_fp_t *, int *, int *, int *, g_fp_t *, g_fp_t *, g_fp_t *, g_fp_t *, int *, int *)
void sgetri_(int *N, float *A, int *lda, int *IPIV, float *WORK, int *LWORK, int *INFO)
#define _fill_pfi(__i)
float g_fp_t
Definition: freecontact.cpp:38
void sgetrf_(int *M, int *N, float *A, int *LDA, int *IPIV, int *INFO)
static __m128i _mm_setzero_si128()
static void _raw_score_matrix(map< string, ct_vector< fp_t > > &__raw_ctscore, map< string, vector< double > > &__apc_bg, map< string, double > &__apc_mean, const uint16_t __alilen, const string &__key, const _rawscore_calc_t &__fo)
Calculate raw contact scores using given function and collect row/column/overall mean for APC calcula...
static const uint8_t aamap_gapidx
Definition: freecontact.h:38
d2matrix< uint32_t, uint32_t, int > simcnt_t
static vector< contact_t > _apc(const ct_vector< fp_t > &__raw_ctscore, const vector< double > &__apc_bg, const double __apc_mean, const uint16_t __mincontsep, bool __filt)
static uint32_t _cache_holds_nseq(uint16_t __seqsize)
static vector< contact_t > _raw_as_is(const ct_vector< fp_t > &__raw_ctscore, const uint16_t __mincontsep)
float cov_fp_t
Definition: freecontact.cpp:58
Contact prediction.
Definition: freecontact.h:124