meerkat is hosted by Hepforge, IPPP Durham
Meerkat  v1r3
Multidimensional kernel density estimation package
KernelDensity.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <vector>
3 #include <stdlib.h>
4 
5 #include "TMath.h"
6 #include "TRandom3.h"
7 
8 #include "AbsPhaseSpace.hh"
9 #include "AbsDensity.hh"
10 #include "KernelDensity.hh"
11 
12 #include "Logger.hh"
13 
14 KernelDensity::KernelDensity(const char* pdfname,
15  AbsPhaseSpace* thephaseSpace,
16  UInt_t apprSize,
17  Double_t width1,
18  Double_t width2,
19  Double_t width3,
20  Double_t width4,
21  Double_t width5) : AbsDensity(pdfname) {
22  std::vector<Double_t> width_list;
23 
24  width_list.push_back( width1 );
25  if (width2 > 0) width_list.push_back( width2 );
26  if (width3 > 0) width_list.push_back( width3 );
27  if (width4 > 0) width_list.push_back( width4 );
28  if (width5 > 0) width_list.push_back( width5 );
29 
30  m_phaseSpace = thephaseSpace;
31  m_approxDensity = 0;
32  m_maxTries = 10000;
33 
34  if (width_list.size() != m_phaseSpace->dimensionality()) {
35  Logger::print(2,"%20.20s ERROR: Number of non-zero elements in width list (%d) does not match phase space dimensionality (%d)\n",
36  m_name, (UInt_t)width_list.size(), m_phaseSpace->dimensionality());
37  abort();
38  }
39 
40  setWidth(width_list);
41  generateApproximation(apprSize);
42 
43 }
44 
45 KernelDensity::KernelDensity(const char* pdfname,
46  AbsPhaseSpace* thephaseSpace,
47  UInt_t apprSize,
48  AbsDensity* approxDensity,
49  Double_t width1,
50  Double_t width2,
51  Double_t width3,
52  Double_t width4,
53  Double_t width5) : AbsDensity(pdfname) {
54  std::vector<Double_t> width_list;
55 
56  width_list.push_back( width1 );
57  if (width2 > 0) width_list.push_back( width2 );
58  if (width3 > 0) width_list.push_back( width3 );
59  if (width4 > 0) width_list.push_back( width4 );
60  if (width5 > 0) width_list.push_back( width5 );
61 
62  m_phaseSpace = thephaseSpace;
63  m_approxDensity = approxDensity;
64  m_maxTries = 10000;
65 
66  if (width_list.size() != m_phaseSpace->dimensionality()) {
67  Logger::print(2,"%20.20s ERROR: Number of non-zero elements in width list (%d) does not match phase space dimensionality (%d)\n",
68  m_name, (UInt_t)width_list.size(), m_phaseSpace->dimensionality());
69  abort();
70  }
71 
72  if (approxDensity && approxDensity->phaseSpace() != m_phaseSpace) {
73  Logger::print(2,"%20.20s ERROR: Phase space definitions for Kernel density and its approximation differ\n", m_name);
74  abort();
75  }
76 
77  setWidth(width_list);
78  generateApproximation(apprSize);
79 
80 }
81 
82 KernelDensity::KernelDensity(const char* pdfName,
83  AbsPhaseSpace* thePhaseSpace,
84  std::vector<Double_t> &width,
85  UInt_t apprSize,
86  AbsDensity* approxDensity) : AbsDensity(pdfName) {
87 
88  m_phaseSpace = thePhaseSpace;
89  m_approxDensity = approxDensity;
90  m_maxTries = 10000;
91 
92  if (width.size() != m_phaseSpace->dimensionality()) {
93  Logger::print(2,"%20.20s ERROR: Number of elements in width list (%d) does not match phase space dimensionality (%d)\n",
94  m_name, (UInt_t)width.size(), m_phaseSpace->dimensionality());
95  abort();
96  }
97 
98  if (approxDensity && approxDensity->phaseSpace() != m_phaseSpace) {
99  Logger::print(2,"%20.20s ERROR: Phase space definitions for Kernel density and its approximation differ\n", m_name);
100  abort();
101  }
102 
103  setWidth(width);
104  generateApproximation(apprSize);
105 }
106 
108 
109 }
110 
112 void KernelDensity::setWidth(std::vector<Double_t> &width) {
113 
114  UInt_t n = 0;
115  std::vector<Double_t>::const_iterator i;
116  for (i=width.begin(); i != width.end(); i++) {
117  n++;
118  if (*i <= 0.) {
119  Logger::print(2,"%20.20s ERROR: Kernel width for variable %d less or equal to zero\n", m_name, n);
120  abort();
121  }
122  }
123 
124  m_width = width;
125 
126 }
127 
129 Bool_t KernelDensity::generateApproximation(UInt_t apprSize) {
130 
131  UInt_t dimensionality = m_phaseSpace->dimensionality();
132 
133  UInt_t cells = numCells();
134 
135  m_apprVector.resize(cells);
136 
137  Int_t cell;
138  for (cell = 0; cell < (Int_t)cells; cell++)
139  m_apprVector[cell].reserve(apprSize/cells);
140 
141  std::vector<Double_t> point(dimensionality);
142 
143  Logger::print(0,"%20.20s INFO: Generating approximation sample for %d-dim distribution, %d points, %d cells\n",
144  m_name, dimensionality, apprSize, cells);
145 
146  UInt_t i;
147  for (i=0; i<apprSize; i++) {
148 
149  if (!m_approxDensity) {
150  UInt_t t;
151  Bool_t success = 0;
152  for (t = 0; t < m_maxTries; t++) {
153 
154  // Generate random point
155  UInt_t var;
156  for (var = 0; var < dimensionality; var++) {
157  Double_t lowerLimit = m_phaseSpace->lowerLimit(var);
158  Double_t upperLimit = m_phaseSpace->upperLimit(var);
159  point[var] = lowerLimit + m_rnd.Rndm()*(upperLimit-lowerLimit);
160  }
161 
162  Bool_t inPhsp = m_phaseSpace->withinLimits(point);
163  if (inPhsp) {
164  success = 1;
165  break;
166  }
167  }
168  if (!success) {
169  Logger::print(1,"%20.20s WARNING: failed to generate a point within phase space after %d tries\n", m_name, m_maxTries);
170  return 0;
171  }
172 
173  } else {
174  m_approxDensity->generate(point);
175  }
176 
177  cell = cellIndex(point);
178  if (cell < (Int_t)cells && cell >= 0)
179  m_apprVector[cell].push_back(point);
180  else if (cell > 0) {
181  Logger::print(2,"%20.20s ERROR: cell number %d exceeds vector size %d\n", m_name, cell, cells);
182  abort();
183  }
184 
185  if (i % 1000 == 0) Logger::print(0,"%20.20s INFO: %d%% done (%d/%d)\n", m_name, (100*i/apprSize), i, apprSize);
186 
187  }
188 
189  return 1;
190 }
191 
192 Bool_t KernelDensity::readTuple(TTree* tree, const char* var1, const char* var2,
193  const char* var3, const char* var4,
194  const char* var5, const char* var6, UInt_t maxEvents) {
195 
196  UInt_t dim = phaseSpace()->dimensionality();
197  std::vector<TString> varList(dim);
198 
199  if (dim>=1 && var1!=0) varList[0] = TString(var1);
200  if (dim>=2 && var2!=0) varList[1] = TString(var2);
201  if (dim>=3 && var3!=0) varList[2] = TString(var3);
202  if (dim>=4 && var4!=0) varList[3] = TString(var4);
203  if (dim>=5 && var5!=0) varList[4] = TString(var5);
204  if (dim>=6 && var6!=0) varList[5] = TString(var6);
205 
206  return readTuple(tree, varList, maxEvents);
207 
208 }
209 
210 Bool_t KernelDensity::readTuple(TTree* tree, const char* var1, const char* var2,
211  const char* var3, const char* var4,
212  const char* var5, UInt_t maxEvents) {
213 
214  UInt_t dim = phaseSpace()->dimensionality();
215  std::vector<TString> varList(dim);
216 
217  if (dim>=1 && var1!=0) varList[0] = TString(var1);
218  if (dim>=2 && var2!=0) varList[1] = TString(var2);
219  if (dim>=3 && var3!=0) varList[2] = TString(var3);
220  if (dim>=4 && var4!=0) varList[3] = TString(var4);
221  if (dim>=5 && var5!=0) varList[4] = TString(var5);
222 
223  return readTuple(tree, varList, maxEvents);
224 
225 }
226 
227 Bool_t KernelDensity::readTuple(TTree* tree, const char* var1, const char* var2,
228  const char* var3, const char* var4,
229  UInt_t maxEvents) {
230 
231  UInt_t dim = phaseSpace()->dimensionality();
232  std::vector<TString> varList(dim);
233 
234  if (dim>=1 && var1!=0) varList[0] = TString(var1);
235  if (dim>=2 && var2!=0) varList[1] = TString(var2);
236  if (dim>=3 && var3!=0) varList[2] = TString(var3);
237  if (dim>=4 && var4!=0) varList[3] = TString(var4);
238 
239  return readTuple(tree, varList, maxEvents);
240 
241 }
242 
243 Bool_t KernelDensity::readTuple(TTree* tree, const char* var1, const char* var2,
244  const char* var3, UInt_t maxEvents) {
245 
246  UInt_t dim = phaseSpace()->dimensionality();
247  std::vector<TString> varList(dim);
248 
249  if (dim>=1 && var1!=0) varList[0] = TString(var1);
250  if (dim>=2 && var2!=0) varList[1] = TString(var2);
251  if (dim>=3 && var3!=0) varList[2] = TString(var3);
252 
253  return readTuple(tree, varList, maxEvents);
254 
255 }
256 
257 Bool_t KernelDensity::readTuple(TTree* tree, const char* var1, const char* var2,
258  UInt_t maxEvents) {
259 
260  UInt_t dim = phaseSpace()->dimensionality();
261  std::vector<TString> varList(dim);
262 
263  if (dim>=1 && var1!=0) varList[0] = TString(var1);
264  if (dim>=2 && var2!=0) varList[1] = TString(var2);
265 
266  return readTuple(tree, varList, maxEvents);
267 
268 }
269 
270 Bool_t KernelDensity::readTuple(TTree* tree, const char* var1, UInt_t maxEvents) {
271 
272  UInt_t dim = phaseSpace()->dimensionality();
273  std::vector<TString> varList(dim);
274 
275  if (dim>=1 && var1!=0) varList[0] = TString(var1);
276 
277  return readTuple(tree, varList, maxEvents);
278 
279 }
280 
282  UInt_t dim = phaseSpace()->dimensionality();
283 
284  UInt_t i;
285  UInt_t cells = 1;
286  for (i = 0; i<dim; i++) {
287  Double_t lower = phaseSpace()->lowerLimit(i);
288  Double_t upper = phaseSpace()->upperLimit(i);
289 
290  Int_t dimCells = TMath::Ceil( (upper-lower)/m_width[i] );
291 
292  cells *= dimCells;
293  }
294 
295  return cells;
296 }
297 
298 Int_t KernelDensity::cellIndex(std::vector<Double_t>& x) {
299 
300  UInt_t dim = phaseSpace()->dimensionality();
301 
302  UInt_t i;
303  UInt_t cell = 0;
304  for (i = 0; i<dim; i++) {
305  Double_t lower = phaseSpace()->lowerLimit(i);
306  Double_t upper = phaseSpace()->upperLimit(i);
307 
308  Double_t xi = x[i];
309  if (xi < lower || xi > upper) {
310  return -1;
311  }
312 
313  if (i<dim-1) {
314  Int_t dimCells = TMath::Ceil( (upper-lower)/m_width[i+1] );
315  cell *= dimCells;
316  }
317 
318  Int_t dimCells = TMath::Ceil( (upper-lower)/m_width[i] );
319  Int_t dimCell = TMath::Floor( (xi-lower)/m_width[i] );
320  if (dimCell == dimCells) dimCell--;
321 
322  if (dimCell < 0 || dimCell >= dimCells) {
323  Logger::print(2,"%20.20s ERROR: Something wrong! upper=%f, lower=%f, x=%f, width=%f, cell=%d, ncells=%d\n", m_name,
324  upper, lower, xi, m_width[i], dimCell, dimCells);
325  abort();
326  return -1;
327  }
328 // if (dimCell == 20 && dim==1)
329 // Logger::print(0,"DEBUG: CellIndex = %d, lower=%f, upper=%f, x=%f, width=%f\n", dimCell, lower, upper, xi, m_width[i]);
330 
331  cell += dimCell;
332  }
333 
334  return cell;
335 
336 }
337 
338 Bool_t KernelDensity::readTuple(TTree* tree, std::vector<TString> &vars, UInt_t maxEvents) {
339 
340  if (vars.size() != m_phaseSpace->dimensionality() ) {
341  Logger::print(2,"%20.20s ERROR: Number of TTree variables (%d) in tree \"%s\" does not correspond to phase space dimensionality (%d)\n",
342  m_name, (UInt_t)vars.size(), tree->GetName(), m_phaseSpace->dimensionality() );
343  abort();
344  }
345 
346  UInt_t nvars = vars.size();
347 
348  tree->ResetBranchAddresses();
349 
350  Long64_t nentries = tree->GetEntries();
351 
352  if (maxEvents > 0 && maxEvents < nentries) nentries = maxEvents;
353 
354  Long64_t i;
355 
356  std::vector<Float_t> varArray(nvars);
357 
358  UInt_t n;
359  for (n=0; n < nvars; n++) {
360  Logger::print(0,"%20.20s INFO: Will read branch \"%s\" from tree \"%s\"\n", m_name, vars[n].Data(), tree->GetName());
361  Int_t status = tree->SetBranchAddress(vars[n], &( varArray[n] ));
362  if (status < 0) {
363  Logger::print(2,"%20.20s ERROR: Error setting branch, status=%d\n", m_name, status);
364  abort();
365  }
366  }
367 
368  std::vector<Double_t> point(nvars);
369 
370  UInt_t cells = numCells();
371 
372  m_dataVector.resize(cells);
373 
374  Int_t cell;
375  for (cell = 0; cell < (Int_t)cells; cell++) {
376  m_dataVector[cell].reserve(nentries/cells);
377  }
378 
379  UInt_t nout = 0;
380  for(i=0; i<nentries; i++) {
381 // Logger::print(0,"DEBUG: before: %f\n", varArray[0]);
382  tree->GetEntry(i);
383 // Logger::print(0,"DEBUG: after: %f\n", varArray[0]);
384 
385  for (n=0; n<nvars; n++) {
386  point[n] = varArray[n];
387  }
388 
389  if (!m_phaseSpace->withinLimits( point )) {
390  nout ++;
391  Logger::print(1,"%20.20s WARNING: Ntuple point (", m_name);
392  for (n=0; n<nvars; n++) {
393  Logger::print(1,"%f ", point[n]);
394  }
395  Logger::print(1,", %f%%) outside phase space\n", 100.*float(nout)/float(i));
396  } else {
397  cell = cellIndex( point );
398  if (cell>=0) m_dataVector[cell].push_back(point);
399  }
400 
401  if (i % 10000 == 0) {
402  Logger::print(0,"%20.20s INFO: Read %lld/%lld events (%f%%)\n", m_name, i, nentries, 100.*float(i)/float(nentries));
403  }
404  }
405 
406  Logger::print(0,"%20.20s INFO: %lld events read in from \"%s\"\n", m_name, nentries-nout, tree->GetName() );
407 
408  return 1;
409 }
410 
411 Double_t KernelDensity::rawDensity(std::vector<Double_t> &x, std::vector<TCell> &vector) {
412 
413 // UInt_t cells = numCells();
414 
415  UInt_t dim = m_phaseSpace->dimensionality();
416 
417  std::vector<UInt_t> iter(dim);
418 
419  UInt_t j;
420  for (j=0; j<dim; j++) {
421  iter[j] = -1;
422  }
423 
424  Double_t d = 0.;
425 
426  // Loop through the vertices of the N-dim 3^N cube
427  do {
428 
429 // Logger::print(0,"DEBUG: Iterator ");
430 // for (j=0; j<dim; j++) {
431 // Logger::print(0," %d", iter[j]);
432 // }
433 // Logger::print(0,"\n");
434 
435  std::vector<Double_t> point(dim);
436  for (j=0; j<dim; j++) {
437  point[j] = x[j] + iter[j]*m_width[j];
438  }
439 
440  Int_t cell = cellIndex(point);
441 
443 
444 // Logger::print(-1,"DEBUG: Cell %d/%d\n", cell, cells);
445 
446  if (cell >= 0) {
447 
448  UInt_t size = vector[cell].size();
449 
450  for (UInt_t i=0; i<size; i++) {
451  Double_t sqsum = 0.;
452  for (UInt_t var=0; var < dim; var++) {
453  Double_t dx = (vector[cell][i][var] - x[var])/m_width[var];
454  if (TMath::Abs(dx) < 1.) {
455  sqsum += dx*dx;
456  } else {
457  sqsum = 1.;
458  break;
459  }
460  }
461  if (sqsum < 1.) d += (1.-sqsum);
462  }
463 
464  } // if cell >= 0
465 
466  // Increment iterator
467  Bool_t run = 0;
468  for (j=0; j<dim; j++) {
469  if (iter[j] <= 0) {
470  iter[j]++;
471  run = 1;
472  break;
473  } else {
474  iter[j] = -1;
475  }
476  }
477  if (!run) break;
478 
479  } while(1);
480 
481 
482  return d;
483 }
484 
485 Double_t KernelDensity::density(std::vector<Double_t> &x) {
486 
487  Double_t rawData = rawDensity(x, m_dataVector);
488  Double_t rawNorm = rawDensity(x, m_apprVector);
489 
490  if (rawNorm <= 0) {
491 // Logger::print(1,"WARNING: Normalisation density <= 0!\n");
492  return 0.;
493  }
494 
495  Double_t corrEff = rawData/rawNorm;
496  if (m_approxDensity) {
497  corrEff *= m_approxDensity->density(x);
498  }
499 
500 // Logger::print(-1,"DEBUG: %f %f %f %f\n", x[0], rawData, rawNorm, corrEff);
501 
502  return corrEff;
503 }
TPhspVector m_width
AbsPhaseSpace * m_phaseSpace
std::vector< TCell > m_dataVector
AbsDensity * m_approxDensity
virtual Double_t upperLimit(UInt_t var)=0
Return upper allowed limit of the variable.
KernelDensity(const char *pdfName, AbsPhaseSpace *thePhaseSpace, std::vector< Double_t > &width, UInt_t approxSize, AbsDensity *approxDensity=0)
Abstract class which defines probability density interface.
Definition: AbsDensity.hh:16
virtual AbsPhaseSpace * phaseSpace()=0
Return phase space definition for this PDF.
void print(int level, const char *format,...)
Definition: Logger.cpp:27
char m_name[256]
PDF name.
Definition: AbsDensity.hh:118
virtual ~KernelDensity()
AbsPhaseSpace * phaseSpace()
Return phase space definition for this PDF.
Bool_t generateApproximation(UInt_t approxSize)
Create normalisation vector.
void setWidth(std::vector< Double_t > &width)
Set kernel width.
TRandom3 m_rnd
Random number generator.
Definition: AbsDensity.hh:127
Double_t density(std::vector< Double_t > &x)
Calculate PDF value at the given point.
Abstract class which defines phase space interface.
virtual UInt_t dimensionality()=0
Get dimensionality of phase space.
UInt_t m_maxTries
Maximum number of tries for accept-reject method.
Definition: AbsDensity.hh:124
Double_t rawDensity(std::vector< Double_t > &x, std::vector< TCell > &vector)
virtual Double_t lowerLimit(UInt_t var)=0
Return lower allowed limit of the variable.
Double_t generate(std::vector< Double_t > &x)
Generate a single point within the phase space according to the PDF using accept-reject method...
Definition: AbsDensity.cpp:99
Bool_t readTuple(TTree *tree, std::vector< TString > &vars, UInt_t maxEvents=0)
virtual Bool_t withinLimits(std::vector< Double_t > &x)=0
Check if the point is within phase space limits.
virtual Double_t density(std::vector< Double_t > &x)=0
Calculate PDF value at the given point.
UInt_t numCells(void)
std::vector< TCell > m_apprVector
Int_t cellIndex(std::vector< Double_t > &x)