meerkat is hosted by Hepforge, IPPP Durham
Meerkat  v1r3
Multidimensional kernel density estimation package
BinnedDensity.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <vector>
3 #include <stdlib.h>
4 
5 #include "Compression.h"
6 #include "TMath.h"
7 #include "TFile.h"
8 #include "TTree.h"
9 #include "TROOT.h"
10 #include "TRandom3.h"
11 
12 #include "AbsPhaseSpace.hh"
13 #include "AbsDensity.hh"
14 #include "BinnedDensity.hh"
15 
16 #include "Logger.hh"
17 
18 #define MAX_VECTOR_SIZE 20000000
19 
21 BinnedDensity::BinnedDensity(const char* pdfName,
22  AbsPhaseSpace* thePhaseSpace,
23  AbsDensity* d,
24  UInt_t bins1,
25  UInt_t bins2,
26  UInt_t bins3,
27  UInt_t bins4,
28  UInt_t bins5) : AbsDensity(pdfName) {
29  std::vector<UInt_t> bins;
30  bins.push_back( bins1 );
31  if (bins2 > 0) bins.push_back( bins2 );
32  if (bins3 > 0) bins.push_back( bins3 );
33  if (bins4 > 0) bins.push_back( bins4 );
34  if (bins5 > 0) bins.push_back( bins5 );
35 
36  init(thePhaseSpace, bins, d);
37 }
38 
40 BinnedDensity::BinnedDensity(const char* pdfName,
41  AbsPhaseSpace* thePhaseSpace,
42  std::vector<UInt_t> &binning,
43  AbsDensity* d) : AbsDensity(pdfName) {
44  init(thePhaseSpace, binning, d);
45 }
46 
48 void BinnedDensity::init(AbsPhaseSpace* thePhaseSpace,
49  std::vector<UInt_t> &binning,
50  AbsDensity* d) {
51 
52  m_phaseSpace = thePhaseSpace;
53  m_binning = binning;
54  m_density = d;
55  UInt_t dim = m_phaseSpace->dimensionality();
56 
57  Logger::print(0,"%20.20s INFO: Creating binned density over %dD phase space from density \"%s\"\n", m_name, dim, d->name() );
58 
59  if (m_binning.size() != m_phaseSpace->dimensionality()) {
60  Logger::print(2,"%20.20s ERROR: Dimensionality of phase space (%d) does not match binning vector size (%d)\n",
61  m_name, m_phaseSpace->dimensionality(), (UInt_t)m_binning.size());
62  abort();
63  }
64 
65  UInt_t size = 1;
66  std::vector<UInt_t>::iterator i;
67  for (i=m_binning.begin(); i!=m_binning.end(); i++) {
68  size *= (*i);
69  }
70 
71  Logger::print(0,"%20.20s INFO: Map size=%d\n", m_name, size);
72  if (size > MAX_VECTOR_SIZE) {
73  Logger::print(2,"%20.20s ERROR: Map size (%d) too large!\n", m_name, size);
74  abort();
75  }
76 
77  m_map.resize(size);
78 
79  std::vector<Double_t> x(dim);
80  std::vector<UInt_t> iter(dim);
81  Int_t j;
82  for (j=0; j<(Int_t)dim; j++) {
83  iter[j] = 0;
84  }
85 
86  Double_t phspSum = 0.;
87  UInt_t phspNum = 0;
88 
90 
91  do {
92  UInt_t index = 0;
93  for (j=dim-1; j>=0; j--) {
94  Double_t low = m_phaseSpace->lowerLimit(j);
95  Double_t up = m_phaseSpace->upperLimit(j);
96  x[j] = low + (Double_t)iter[j]/((Double_t)m_binning[j]-1)*(up-low);
97  if (j==(Int_t)dim-1) {
98  index = iter[j];
99  } else {
100  index = index*m_binning[j] + iter[j];
101  }
102 // if (iter[0] == 0)
103 // Logger::print(0," Dim%d, bin%d, x=%f\n", j, iter[j], x[j]);
104  }
105 
106  Double_t e = m_density->density(x);
107  if (e>m_cutoff) e = m_cutoff;
108 
109  if (m_phaseSpace->withinLimits(x)) {
110  phspSum += e;
111  phspNum++;
112  }
113 
114  if ((index % 100) == 0 && Logger::timer(2))
115  Logger::print(0,"%20.20s INFO: Index %d, density=%f\n", m_name, index, e);
116 
117  if (index >= size) {
118  Logger::print(2,"%20.20s ERROR: index (%d) is larger than array size (%d)\n", m_name, index, size);
119  abort();
120  } else {
121  m_map[index] = e;
122  }
123 
124  Bool_t run = 0;
125  for (j=0; j<(Int_t)dim; j++) {
126  if (iter[j] < m_binning[j]-1) {
127  iter[j]++;
128  run = 1;
129  break;
130  } else {
131  iter[j] = 0;
132  }
133  }
134  if (!run) break;
135 
136 
137  } while(1);
138 
139  // Normalize map such that its average equals to 1.
140  phspSum /= (Double_t)phspNum;
141  for (j = 0; j<(Int_t)size; j++) m_map[j] /= phspSum;
142 }
143 
145 BinnedDensity::BinnedDensity(const char* pdfName, AbsPhaseSpace* thePhaseSpace, const char* filename, Double_t cutoff) : AbsDensity(pdfName) {
146  m_phaseSpace = thePhaseSpace;
147  m_density = 0;
148  m_cutoff = cutoff;
149 
150  readFromFile(filename);
151 }
152 
153 void BinnedDensity::readFromFile(const char* filename) {
154  if (TString(filename).EndsWith(".root") ) {
155  readFromRootFile(filename);
156  } else {
157  readFromTextFile(filename);
158  }
159 }
160 
162 void BinnedDensity::readFromTextFile(const char* filename) {
163 
164  FILE* file = fopen(filename, "r");
165  if (!file) {
166  Logger::print(2,"%20.20s ERROR: binned density file \"%s\" not found\n", m_name, filename );
167  abort();
168  }
169 
170  UInt_t dim;
171  if (fscanf(file, "%d", &dim) != 1) {
172  Logger::print(2,"%20.20s ERROR: Phase space dimensionality not found in file \"%s\"\n", m_name, filename);
173  abort();
174  }
175 
176  Logger::print(0,"%20.20s INFO: Reading binned density over %dD phase space from text file \"%s\"\n", m_name, dim, filename );
177 
178  if (dim != m_phaseSpace->dimensionality()) {
179  Logger::print(2,"%20.20s ERROR: Dimensionality of phase space (%d) does not match binning vector size (%d)\n",
181  abort();
182  }
183 
184  Int_t j;
185  m_binning.resize(dim);
186  for (j=0; j<(Int_t)dim; j++) {
187  int nbins;
188  if (fscanf(file, "%d", &nbins) != 1) {
189  Logger::print(2,"%20.20s ERROR: Number of bins not found in file \"%s\"\n", m_name, filename);
190  abort();
191  }
192  m_binning[j] = nbins;
193  }
194 
195  UInt_t size = 1;
196  std::vector<UInt_t>::iterator i;
197  for (i=m_binning.begin(); i!=m_binning.end(); i++) {
198  size *= (*i);
199  }
200 
201  Logger::print(0,"%20.20s INFO: Map size=%d\n", m_name, size);
202  if (size > MAX_VECTOR_SIZE) {
203  Logger::print(2,"%20.20s ERROR: Map size too large!\n", m_name);
204  abort();
205  }
206 
207  m_map.resize(size);
208 
209  // Zero iterator vector
210  std::vector<Double_t> x(dim);
211  std::vector<UInt_t> iter(dim);
212  for (j=0; j<(Int_t)dim; j++) {
213  iter[j] = 0;
214  }
215 
216  // Loop through the bins
217 
218  Logger::setTimer();
219  do {
220 
221  UInt_t index = 0;
222  for (j=dim-1; j>=0; j--) {
223  Double_t low = m_phaseSpace->lowerLimit(j);
224  Double_t up = m_phaseSpace->upperLimit(j);
225  x[j] = low + (Double_t)iter[j]/((Double_t)m_binning[j]-1)*(up-low);
226  if (j==(Int_t)dim-1) {
227  index = iter[j];
228  } else {
229  index = index*m_binning[j] + iter[j];
230  }
231 // if (iter[0] == 0)
232 // Logger::print(0," Dim%d, bin%d, x=%f\n", j, iter[j], x[j]);
233  }
234 
235  Double_t e;
236  Int_t dummy;
237  if (fscanf(file, "%lf %d", &e, &dummy) != 2) {
238  Logger::print(2,"%20.20s ERROR: Error reading map from file \"%s\", index %d\n", m_name, filename, index);
239  abort();
240  }
241  if (e>m_cutoff) e = m_cutoff;
242 
243  if ((index % 100) == 0 && Logger::timer(2)) Logger::print(0,"%20.20s INFO: Index %d, density=%f\n", m_name, index, e);
244 
245  if (index >= size) {
246  Logger::print(2,"%20.20s ERROR: index (%d) is larger than array size (%d)\n", m_name, index, size);
247  abort();
248  } else {
249  m_map[index] = e;
250  }
251 
252  Bool_t run = 0;
253  for (j=0; j<(Int_t)dim; j++) {
254  if (iter[j] < m_binning[j]-1) {
255  iter[j]++;
256  run = 1;
257  break;
258  } else {
259  iter[j] = 0;
260  }
261  }
262  if (!run) break;
263 
264  } while(1);
265 
266  fclose(file);
267 }
268 
270 void BinnedDensity::readFromRootFile(const char* filename) {
271 
272  TFile* file = TFile::Open(filename);
273  if (!file) {
274  Logger::print(2,"%20.20s ERROR: binned density file \"%s\" not found\n", m_name, filename );
275  abort();
276  }
277 
278  TTree* dimTree;
279  TTree* mapTree;
280 
281  dimTree = (TTree*)gROOT->FindObject("DimTree");
282  mapTree = (TTree*)gROOT->FindObject("MapTree");
283 
284  if (!dimTree) {
285  Logger::print(2,"%20.20s ERROR: DimTree not found in file \"%s\"\n", m_name, filename);
286  abort();
287  }
288  if (!mapTree) {
289  Logger::print(2,"%20.20s ERROR: MapTree not found in file \"%s\"\n", m_name, filename);
290  abort();
291  }
292 
293  UInt_t dim = dimTree->GetEntries();
294 
295  Logger::print(0,"%20.20s INFO: Reading binned density over %dD phase space from ROOT file \"%s\"\n", m_name, dim, filename );
296 
297  if (dim != m_phaseSpace->dimensionality()) {
298  Logger::print(2,"%20.20s ERROR: Dimensionality of phase space (%d) does not match binning vector size (%d)\n",
299  m_name, (int)m_phaseSpace->dimensionality(), dim);
300  abort();
301  }
302 
303  Int_t j;
304  m_binning.resize(dim);
305  Int_t nbins;
306  dimTree->SetBranchAddress("bins", &nbins);
307  for (j=0; j<(Int_t)dim; j++) {
308  dimTree->GetEvent(j);
309  m_binning[j] = nbins;
310  }
311 
312  UInt_t size = 1;
313  std::vector<UInt_t>::iterator i;
314  for (i=m_binning.begin(); i!=m_binning.end(); i++) {
315  size *= (*i);
316  }
317 
318  Logger::print(0,"%20.20s INFO: Map size=%d\n", m_name, size);
319  if (size > MAX_VECTOR_SIZE) {
320  Logger::print(2,"%20.20s ERROR: Map size too large!\n", m_name);
321  abort();
322  }
323 
324  m_map.resize(size);
325 
326  // Zero iterator vector
327  std::vector<Double_t> x(dim);
328  std::vector<UInt_t> iter(dim);
329  for (j=0; j<(Int_t)dim; j++) {
330  iter[j] = 0;
331  }
332 
333  // Loop through the bins
334  Float_t e;
335  Char_t inphsp;
336  mapTree->SetBranchAddress("dens", &e);
337  mapTree->SetBranchAddress("inphsp", &inphsp);
338  if (mapTree->GetEntries() != size) {
339  Logger::print(2,"%20.20s ERROR: Map size (%d) does not match number of entries in MapTree (%d)!\n", m_name,
340  size, (int)mapTree->GetEntries());
341  abort();
342  }
343 
344  Logger::setTimer();
345  do {
346 
347  UInt_t index = 0;
348  for (j=dim-1; j>=0; j--) {
349  Double_t low = m_phaseSpace->lowerLimit(j);
350  Double_t up = m_phaseSpace->upperLimit(j);
351  x[j] = low + (Double_t)iter[j]/((Double_t)m_binning[j]-1)*(up-low);
352  if (j==(Int_t)dim-1) {
353  index = iter[j];
354  } else {
355  index = index*m_binning[j] + iter[j];
356  }
357 // if (iter[0] == 0)
358 // Logger::print(0," Dim%d, bin%d, x=%f\n", j, iter[j], x[j]);
359  }
360 
361 // if (fscanf(file, "%lf %d", &e, &dummy) != 2) {
362 // Logger::print(0,"%20.20s ERROR: Error reading map from file \"%s\", index %d\n", m_name, filename, index);
363 // abort();
364 // }
365  if ((index % 100) == 0 && Logger::timer(2)) Logger::print(0,"%20.20s INFO: Index %d, density=%f\n", m_name, index, e);
366 
367  if (index >= size) {
368  Logger::print(0,"%20.20s ERROR: index (%d) is larger than array size (%d)\n", m_name, index, size);
369  abort();
370  } else {
371  mapTree->GetEvent(index);
372  if (e>m_cutoff) e = m_cutoff;
373  m_map[index] = e;
374  }
375 
376  Bool_t run = 0;
377  for (j=0; j<(Int_t)dim; j++) {
378  if (iter[j] < m_binning[j]-1) {
379  iter[j]++;
380  run = 1;
381  break;
382  } else {
383  iter[j] = 0;
384  }
385  }
386  if (!run) break;
387 
388  } while(1);
389 
390  file->Close();
391 }
392 
393 
395 
396 }
397 
398 void BinnedDensity::writeToFile(const char* filename) {
399  if (TString(filename).EndsWith(".root") ) {
400  writeToRootFile(filename);
401  } else {
402  writeToTextFile(filename);
403  }
404 }
405 
407 void BinnedDensity::writeToTextFile(const char* filename) {
408 
409  Logger::print(0,"%20.20s INFO: Writing binned density to text file \"%s\"\n", m_name, filename );
410 
411  FILE* file = fopen(filename, "w+");
412 
413  UInt_t dim = m_phaseSpace->dimensionality();
414  fprintf(file, "%d\n", dim);
415 
416  Int_t j;
417  for (j=0; j<(Int_t)dim; j++) {
418  fprintf(file, "%d\n", m_binning[j]);
419  }
420 
421  // Zero iterator vector
422  std::vector<Double_t> x(dim);
423  std::vector<UInt_t> iter(dim);
424  for (j=0; j<(Int_t)dim; j++) {
425  iter[j] = 0;
426  }
427 
428  UInt_t size = m_map.size();
429 
430  // Loop through the bins
431  do {
432 
433  UInt_t index = 0;
434  for (j=dim-1; j>=0; j--) {
435  Double_t low = m_phaseSpace->lowerLimit(j);
436  Double_t up = m_phaseSpace->upperLimit(j);
437  x[j] = low + (Double_t)iter[j]/((Double_t)m_binning[j]-1)*(up-low);
438  if (j==(Int_t)dim-1) {
439  index = iter[j];
440  } else {
441  index = index*m_binning[j] + iter[j];
442  }
443 // if (iter[0] == 0)
444 // Logger::print(0," Dim%d, bin%d, x=%f\n", j, iter[j], x[j]);
445  }
446 
447  if (index >= size) {
448  Logger::print(2,"%20.20s ERROR: index (%d) is larger than array size (%d)\n", m_name, index, size);
449  abort();
450  } else {
451  fprintf(file, "%f %d\n", m_map[index], m_phaseSpace->withinLimits(x) );
452  }
453 
454  Bool_t run = 0;
455  for (j=0; j<(Int_t)dim; j++) {
456  if (iter[j] < m_binning[j]-1) {
457  iter[j]++;
458  run = 1;
459  break;
460  } else {
461  iter[j] = 0;
462  }
463  }
464  if (!run) break;
465 
466  } while(1);
467  fclose(file);
468 }
469 
471 void BinnedDensity::writeToRootFile(const char* filename) {
472 
473  Logger::print(0,"%20.20s INFO: Writing binned density to ROOT file \"%s\"\n", m_name, filename );
474 
475  TFile file(filename, "RECREATE");
476  file.SetCompressionSettings(ROOT::CompressionSettings(ROOT::kLZMA, 9)); // Maximum compression
477  TTree dimTree("DimTree", "DimTree");
478 
479  Int_t bins;
480  dimTree.Branch("bins",&bins,"bins/I");
481  UInt_t dim = m_phaseSpace->dimensionality();
482 
483  Int_t j;
484  for (j=0; j<(Int_t)dim; j++) {
485  bins = m_binning[j];
486  dimTree.Fill();
487  }
488  dimTree.Write();
489 
490  // Zero iterator vector
491  std::vector<Double_t> x(dim);
492  std::vector<UInt_t> iter(dim);
493  for (j=0; j<(Int_t)dim; j++) {
494  iter[j] = 0;
495  }
496 
497  UInt_t size = m_map.size();
498 
499  TTree mapTree("MapTree", "MapTree");
500 
501  Char_t inphsp;
502  Float_t dens;
503  mapTree.Branch("dens", &dens,"dens/F");
504  mapTree.Branch("inphsp",&inphsp,"inphsp/B");
505 
506  // Loop through the bins
507  do {
508 
509  UInt_t index = 0;
510  for (j=dim-1; j>=0; j--) {
511  Double_t low = m_phaseSpace->lowerLimit(j);
512  Double_t up = m_phaseSpace->upperLimit(j);
513  x[j] = low + (Double_t)iter[j]/((Double_t)m_binning[j]-1)*(up-low);
514  if (j==(Int_t)dim-1) {
515  index = iter[j];
516  } else {
517  index = index*m_binning[j] + iter[j];
518  }
519 // if (iter[0] == 0)
520 // Logger::print(0," Dim%d, bin%d, x=%f\n", j, iter[j], x[j]);
521  }
522 
523  if (index >= size) {
524  Logger::print(2,"%20.20s ERROR: index (%d) is larger than array size (%d)\n", m_name, index, size);
525  abort();
526  } else {
527  dens = m_map[index];
528  inphsp = m_phaseSpace->withinLimits(x);
529  mapTree.Fill();
530  }
531 
532  Bool_t run = 0;
533  for (j=0; j<(Int_t)dim; j++) {
534  if (iter[j] < m_binning[j]-1) {
535  iter[j]++;
536  run = 1;
537  break;
538  } else {
539  iter[j] = 0;
540  }
541  }
542  if (!run) break;
543 
544  } while(1);
545 
546  mapTree.Write();
547  file.Close();
548 }
549 
550 Double_t BinnedDensity::density(std::vector<Double_t> &x) {
551 
552  UInt_t dim = m_phaseSpace->dimensionality();
553 
554  Int_t j;
555  std::vector<UInt_t> ivect(dim);
556 
557  for (j=0; j<(Int_t)dim; j++) {
558  Double_t low = m_phaseSpace->lowerLimit(j);
559  Double_t up = m_phaseSpace->upperLimit(j);
560  Double_t xj = x[j];
561  if (xj < low || xj > up) {
562  return 0.;
563  }
564  Int_t ij = (int)floor((xj-low)/(up-low)*(m_binning[j]-1));
565 
566  if (ij == (Int_t)m_binning[j]-1) ij--;
567 
568  ivect[j] = ij;
569  }
570 
571  Double_t e = 0.;
572  Double_t wsum = 0.;
573 
574  std::vector<UInt_t> iter(dim);
575 
576  for (j=0; j<(Int_t)dim; j++) {
577  iter[j] = 0;
578  }
579 
580  // Loop through the vertices of the N-dim cube
581  do {
582 
583  // Calculate weight
584  Double_t weight = 1;
585  for (j=0; j<(Int_t)dim; j++) {
586  Double_t low = m_phaseSpace->lowerLimit(j);
587  Double_t up = m_phaseSpace->upperLimit(j);
588 
589  Double_t xj1 = low + ((Double_t)ivect[j])/((Double_t)m_binning[j]-1.)*(up-low);
590  Double_t xj2 = low + ((Double_t)ivect[j]+1.)/((Double_t)m_binning[j]-1.)*(up-low);
591 
592  if (x[j] < xj1 || x[j] > xj2) {
593  Logger::print(1,"%20.20s WARNING: dim %d: x=%f, x1=%f, x2=%f\n", m_name, j, x[j], xj1, xj2);
594  }
595 
596  Double_t fweight;
597  if (iter[j] == 0) {
598  fweight = 1. - (x[j]-xj1)/(xj2-xj1);
599  } else {
600  fweight = (x[j]-xj1)/(xj2-xj1);
601  }
602  weight *= fweight;
603  if (fabs(fweight)>1.)
604  Logger::print(-1,"DEBUG: Weight fraction: dim%d, i=%d, x=%f, x1=%f, x2=%f, fweight=%f\n", j, iter[j], x[j], xj1, xj2, fweight);
605  }
606 
607  UInt_t index = 0;
608  for (j=dim-1; j>=0; j--) {
609  UInt_t ij = ivect[j] + iter[j];
610  if (j==(Int_t)dim-1) {
611  index = ij;
612  } else {
613  index = index*m_binning[j] + ij;
614  }
615  }
616 
617  e += weight*m_map[index];
618  wsum += weight;
619 
620  if (fabs(weight)>1.)
621  Logger::print(0,"DEBUG: Weight=%f, index=%d, density=%f\n", weight, index, m_map[index]);
622 
623  // Increment iterator
624  Bool_t run = 0;
625  for (j=0; j<(Int_t)dim; j++) {
626  if (iter[j] == 0) {
627  iter[j]++;
628  run = 1;
629  break;
630  } else {
631  iter[j] = 0;
632  }
633  }
634  if (!run) break;
635 
636  } while(1);
637 
638 // Logger::print(0,"density=%f, wsum=%f\n", e, wsum);
639 
640  return e;
641 
642 }
const char * name(void)
Return the name of the PDF.
Definition: AbsDensity.hh:106
std::vector< Double_t > m_map
Map of PDF values in bins.
virtual Double_t upperLimit(UInt_t var)=0
Return upper allowed limit of the variable.
Abstract class which defines probability density interface.
Definition: AbsDensity.hh:16
BinnedDensity(const char *pdfName, AbsPhaseSpace *thePhaseSpace, AbsDensity *d, UInt_t bins1, UInt_t bins2=0, UInt_t bins3=0, UInt_t bins4=0, UInt_t bins5=0)
Constructor that creates the binned density from any AbsDensity of the dimensionality up to five...
void readFromTextFile(const char *fileName)
Read the binned density from a text file. The dimensionality of the density stored in the file should...
void print(int level, const char *format,...)
Definition: Logger.cpp:27
char m_name[256]
PDF name.
Definition: AbsDensity.hh:118
Double_t m_cutoff
Cutoff value (values in bins greater than cutoff will be make equal to cutoff).
void writeToFile(const char *fileName)
Write the binned density into a file (ROOT or text format depending on the extension) ...
Abstract class which defines phase space interface.
virtual UInt_t dimensionality()=0
Get dimensionality of phase space.
void setTimer(void)
Reset time counter.
Definition: Logger.cpp:9
virtual Double_t lowerLimit(UInt_t var)=0
Return lower allowed limit of the variable.
int timer(int secs)
Return 1 if more than a certain number of seconds have passed since the last call of this function or...
Definition: Logger.cpp:13
AbsDensity * m_density
Reference to the input density.
void writeToRootFile(const char *fileName)
Write the binned density into a ROOT file.
void init(AbsPhaseSpace *thePhaseSpace, std::vector< UInt_t > &binning, AbsDensity *d)
Common initalisation function used by both constructors from an AbsDensity.
virtual ~BinnedDensity()
Destructor.
void readFromFile(const char *fileName)
Read the binned density from a file, ROOT or text depending on the extension. The dimensionality of t...
#define MAX_VECTOR_SIZE
virtual Bool_t withinLimits(std::vector< Double_t > &x)=0
Check if the point is within phase space limits.
virtual Double_t density(std::vector< Double_t > &x)=0
Calculate PDF value at the given point.
std::vector< UInt_t > m_binning
Vector of bin numbers for each variable.
void readFromRootFile(const char *fileName)
Read the binned density from a ROOT file. The dimensionality of the density stored in the file should...
AbsPhaseSpace * m_phaseSpace
Reference to the phase space definition.
void writeToTextFile(const char *fileName)
Write the binned density into a file.
Double_t density(std::vector< Double_t > &x)
Return the value of the PDF in a point.