VinaLC: Parallel Molecular Docking Program

Biochemical and Biophysical Systems Group
VinaLC version: 1.1.2

statistics.h
Go to the documentation of this file.
1 /*
2 
3  Copyright (c) 2006-2010, The Scripps Research Institute
4 
5  Licensed under the Apache License, Version 2.0 (the "License");
6  you may not use this file except in compliance with the License.
7  You may obtain a copy of the License at
8 
9  http://www.apache.org/licenses/LICENSE-2.0
10 
11  Unless required by applicable law or agreed to in writing, software
12  distributed under the License is distributed on an "AS IS" BASIS,
13  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  See the License for the specific language governing permissions and
15  limitations under the License.
16 
17  Author: Dr. Oleg Trott <ot14@columbia.edu>,
18  The Olson Lab,
19  The Scripps Research Institute
20 
21 */
22 
23 #ifndef VINA_STATISTICS_H
24 #define VINA_STATISTICS_H
25 
26 #include <algorithm> // sort
27 #include <cmath> // sqrt
28 #include "common.h" // for flv
29 
30 // simple but inefficient implementations
31 
32 inline fl mean(const flv& v) {
33  fl acc = 0;
34  VINA_FOR_IN(i, v)
35  acc += v[i];
36  return v.empty() ? 0 : (acc/v.size());
37 }
38 
39 inline fl deviation(const flv& v) {
40  fl m = mean(v);
41  fl acc = 0;
42  VINA_FOR_IN(i, v)
43  acc += sqr(v[i] - m);
44  return v.empty() ? 0 : std::sqrt(acc/v.size());
45 }
46 
47 inline fl rmsd(const flv& a, const flv& b) {
48  VINA_CHECK(a.size() == b.size());
49  fl acc = 0;
50  VINA_FOR_IN(i, a)
51  acc += sqr(a[i] - b[i]);
52  return a.empty() ? 0 : std::sqrt(acc/a.size());
53 }
54 
55 inline fl average_difference(const flv& b, const flv& a) { // b - a
56  VINA_CHECK(a.size() == b.size());
57  fl acc = 0;
58  VINA_FOR_IN(i, a)
59  acc += b[i] - a[i];
60  return a.empty() ? 0 : (acc/a.size());
61 }
62 
63 inline fl pearson(const flv& x, const flv& y) {
64  sz n = x.size();
65  VINA_CHECK(n == y.size());
66  if(n == 0) return 0; // ?
67  fl sum_x = 0;
68  fl sum_y = 0;
69  fl sum_x_sq = 0;
70  fl sum_y_sq = 0;
71  fl sum_prod = 0;
72 
73  VINA_FOR(i, n) {
74  sum_x += x[i];
75  sum_y += y[i];
76  sum_x_sq += sqr(x[i]);
77  sum_y_sq += sqr(y[i]);
78  sum_prod += x[i] * y[i];
79  }
80  fl sd_x = std::sqrt(sum_x_sq/n - sqr(sum_x/n)); // FIXME the argument is supposed to be >= 0, but ...
81  fl sd_y = std::sqrt(sum_y_sq/n - sqr(sum_y/n));
82  fl cov = sum_prod/n - (sum_x/n) * (sum_y/n);
83  fl tmp = sd_x * sd_y;
84  if(std::abs(tmp) < epsilon_fl) return 0; // ?
85  return cov / tmp;
86 }
87 
88 struct spearman_aux {
89  fl x;
90  sz i;
91  spearman_aux(fl x, sz i) : x(x), i(i) {}
92 };
93 
94 inline bool operator<(const spearman_aux& a, const spearman_aux& b) {
95  return a.x < b.x;
96 }
97 
98 inline flv get_rankings(const flv& x) {
99  std::vector<spearman_aux> to_sort;
100  VINA_FOR_IN(i, x)
101  to_sort.push_back(spearman_aux(x[i], i));
102  std::sort(to_sort.begin(), to_sort.end());
103 
104  flv tmp(x.size(), 0);
105  VINA_FOR_IN(rank, to_sort)
106  tmp[to_sort[rank].i] = rank;
107  return tmp;
108 }
109 
110 inline fl spearman(const flv& x, const flv& y) {
111  return pearson(get_rankings(x), get_rankings(y));
112 }
113 
114 #endif