hpstr
The Heavy Photon Search Toolkit for Reconstruction (hpstr) provides an interface to physics data from the HPS experiment saved in the LCIO format and converts it into an ROOT based format.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
BumpHunter.cxx
Go to the documentation of this file.
1
12#include "BumpHunter.h"
13
14BumpHunter::BumpHunter(FitFunction::BkgModel model, int poly_order, int toy_poly_order, int res_factor, double res_scale, bool asymptotic_limit)
15 : ofs(nullptr),
16 res_factor_(res_factor),
17 poly_order_(poly_order),
18 toy_poly_order_(toy_poly_order),
19 asymptotic_limit_(asymptotic_limit),
20 res_scale_(res_scale),
21 bkg_model_(model) { }
22
24
25void BumpHunter::initialize(TH1* histogram, double &mass_hypothesis) {
26 mass_hypothesis_ = mass_hypothesis;
27
28 //Set Minuit Minimizer options
29 ROOT::Math::MinimizerOptions::SetDefaultStrategy(2);
30
31 // Shift the mass hypothesis so it sits in the middle of a bin.
32 std::cout << "[ BumpHunter ]: Shifting mass hypothesis to nearest bin center "
33 << mass_hypothesis << " ---> ";
34 int mbin = histogram->GetXaxis()->FindBin(mass_hypothesis);
35 bin_width_ = histogram->GetBinWidth(2);
36 std::cout << "[ BumpHunter ]: Histogram Bin Width is " << bin_width_ << std::endl;
37 mass_hypothesis = histogram->GetXaxis()->GetBinCenter(mbin);
38 std::cout << mass_hypothesis << " GeV" << std::endl;
39
40 // If the mass hypothesis is below the lower bound, throw an exception. A
41 // search cannot be performed using an invalid value for the mass hypothesis.
42 if(mass_hypothesis < lower_bound_) {
43 throw std::runtime_error("Mass hypothesis less than the lower bound!");
44 }
45
46 // Correct the mass to take into account the mass scale systematic
47 //corr_mass_ = correctMass(mass_hypothesis);
48 //corr_mass_ = mass_hypothesis;
49
50 // Get the mass resolution at the corrected mass
52 std::cout << "[ BumpHunter ]: Mass resolution: " << mass_resolution_ << " GeV" << std::endl;
53
54 // Calculate the fit window size
55 window_size_ = 0;
58 this->printDebug("Window size: " + std::to_string(window_size_));
59
60 // Find the starting position of the window. This is set to the low edge of
61 // the bin closest to the calculated value. If the start position falls
62 // below the lower bound of the histogram, set it to the lower bound.
63 window_start_ = mass_hypothesis - window_size_/2;
64 int window_start_bin = histogram->GetXaxis()->FindBin(window_start_);
65 window_start_ = histogram->GetXaxis()->GetBinLowEdge(window_start_bin);
67 std::cout << "[ BumpHunter ]: Starting edge of window (" << window_start_
68 << " MeV) is below lower bound." << std::endl;
69 window_start_bin = histogram->GetXaxis()->FindBin(lower_bound_);
70 window_start_ = histogram->GetXaxis()->GetBinLowEdge(window_start_bin);
71 }
72
73 // Find the end position of the window. This is set to the lower edge of
74 // the bin closest to the calculated value. If the window edge falls above
75 // the upper bound of the histogram, set it to the upper bound.
76 // Furthermore, check that the bin serving as the upper boundary contains
77 // events. If the upper bound is shifted, reset the lower window bound.
78 window_end_ = mass_hypothesis + window_size_/2;
79 int window_end_bin = histogram->GetXaxis()->FindBin(window_end_);
80 window_end_ = histogram->GetXaxis()->GetBinUpEdge(window_end_bin);
82 std::cout << "[ BumpHunter ]: Upper edge of window (" << window_end_
83 << " GeV) is above upper bound." << std::endl;
84 window_end_bin = histogram->GetXaxis()->FindBin(upper_bound_);
85
86 int last_bin_above = histogram->FindLastBinAbove();
87 if(window_end_bin > last_bin_above) window_end_bin = last_bin_above;
88
89 window_end_ = histogram->GetXaxis()->GetBinUpEdge(window_end_bin);
90 window_start_bin = histogram->GetXaxis()->FindBin(window_end_ - window_size_);
91 window_start_ = histogram->GetXaxis()->GetBinLowEdge(window_start_bin);
92 }
94 bins_ = window_end_bin - window_start_bin + 1;
95 std::cout << "[ BumpHunter ]: Setting starting edge of fit window to "
96 << window_start_ << " GeV. Bin " << window_start_bin << std::endl;
97 std::cout << "[ BumpHunter ]: Setting upper edge of fit window to "
98 << window_end_ << " GeV. Bin " << window_end_bin << std::endl;
99 std::cout << "[ BumpHunter ]: Total Number of Bins: " << bins_ << std::endl;
100
101 // Estimate the background normalization within the window by integrating
102 // the histogram within the window range. This should be close to the
103 // background yield in the case where there is no signal present.
104 integral_ = histogram->Integral(window_start_bin, window_end_bin);
105 std::cout << "[ BumpHunter ]: Integral within window: " << integral_ << std::endl;
106}
107
108HpsFitResult* BumpHunter::performSearch(TH1* histogram, double mass_hypothesis, bool skip_bkg_fit, bool skip_ul) {
109 // Calculate all of the fit parameters e.g. window size, mass hypothesis
110 initialize(histogram, mass_hypothesis);
111 double initNorm = log10(integral_);
112
113 // Instantiate a new fit result object to store all of the results.
114 HpsFitResult* fit_result = new HpsFitResult();
115 fit_result->setPolyOrder(poly_order_);
116 fit_result->setBkgModelType(bkg_model_);
117
118 // Determine whether to use an exponential polynomial or normal polynomial.
121 TF1* bkg{nullptr};
122 TF1* bkg_toys{nullptr};
123
124 std::cout << "Defining fit functions." << std::endl;
125 std::cout << " Model :: ";
126 if(bkg_model_ == FitFunction::BkgModel::CHEBYSHEV) { std::cout << "Chebyshev Polynomial" << std::endl; }
127 else if(bkg_model_ == FitFunction::BkgModel::EXP_CHEBYSHEV) { std::cout << "Exponential Chebyshev Polynomial" << std::endl; }
128 else if(bkg_model_ == FitFunction::BkgModel::LEGENDRE) { std::cout << "Legendre Polynomial" << std::endl; }
129 else if(bkg_model_ == FitFunction::BkgModel::EXP_LEGENDRE) { std::cout << "Exponential Legendre Polynomial" << std::endl; }
130
131 std::cout << " Background Order :: ";
132 FitFunction::ModelOrder bkg_order_model;
133 if(poly_order_ == 1) {
134 std::cout << "1" << std::endl;
135 bkg_order_model = FitFunction::ModelOrder::FIRST;
136 } else if(poly_order_ == 3) {
137 std::cout << "3" << std::endl;
138 bkg_order_model = FitFunction::ModelOrder::THIRD;
139 } else if(poly_order_ == 5) {
140 std::cout << "5" << std::endl;
141 bkg_order_model = FitFunction::ModelOrder::FIFTH;
142 }
143
144 std::cout << " Toy Generator Order :: ";
145 FitFunction::ModelOrder toy_order_model;
146 if(toy_poly_order_ == 1) {
147 std::cout << "1" << std::endl;
148 toy_order_model = FitFunction::ModelOrder::FIRST;
149 } else if(toy_poly_order_ == 3) {
150 std::cout << "3" << std::endl;
151 toy_order_model = FitFunction::ModelOrder::THIRD;
152 } else if(toy_poly_order_ == 5) {
153 std::cout << "5" << std::endl;
154 toy_order_model = FitFunction::ModelOrder::FIFTH;
155 } else if(toy_poly_order_ == 7) {
156 std::cout << "7" << std::endl;
157 toy_order_model = FitFunction::ModelOrder::SEVENTH;
158 }
159
160 // If not fitting toys, start by performing a background only fit.
161 if(!skip_bkg_fit) {
162 // Start by performing a background only fit. The results from this fit
163 // are used to get an intial estimate of the background parameters.
164 std::cout << "*************************************************" << std::endl;
165 std::cout << "*************************************************" << std::endl;
166 std::cout << "[ BumpHunter ]: Performing background only fits." << std::endl;
167 std::cout << "*************************************************" << std::endl;
168 std::cout << "*************************************************" << std::endl;
169
170 // Define the background-only fit model.
171 if(isChebyshev) {
172 ChebyshevFitFunction bkg_func(mass_hypothesis, window_end_ - window_start_, bin_width_, bkg_order_model, FitFunction::SignalFitModel::NONE, isExp);
173 bkg = new TF1("bkg", bkg_func, -1, 1, poly_order_ + 1);
174 } else {
175 LegendreFitFunction bkg_func(mass_hypothesis, window_end_ - window_start_, bin_width_, bkg_order_model, FitFunction::SignalFitModel::NONE, isExp);
176 bkg = new TF1("bkg", bkg_func, -1, 1, poly_order_ + 1);
177 }
178 bkg->SetParameter(0, initNorm);
179 bkg->SetParName(0, "pol0");
180 for(int i = 1; i < poly_order_ + 1; i++) {
181 bkg->SetParameter(i, 0);
182 bkg->SetParName(i, Form("pol%i", i));
183 }
184
185 // Define the toy generator fit model.
186 if(isChebyshev) {
187 ChebyshevFitFunction bkg_toy_func(mass_hypothesis, window_end_ - window_start_, bin_width_, toy_order_model, FitFunction::SignalFitModel::NONE, isExp);
188 bkg_toys = new TF1("bkg_toys", bkg_toy_func, -1, 1, toy_poly_order_ + 1);
189 } else {
190 LegendreFitFunction bkg_toy_func(mass_hypothesis, window_end_ - window_start_, bin_width_, toy_order_model, FitFunction::SignalFitModel::NONE, isExp);
191 bkg_toys = new TF1("bkg_toys", bkg_toy_func, -1, 1, toy_poly_order_ + 1);
192 }
193 bkg_toys->SetParameter(0, initNorm);
194 bkg_toys->SetParName(0, "pol0");
195 for(int i = 1; i < toy_poly_order_ + 1; i++) {
196 bkg_toys->SetParameter(i, 0);
197 bkg_toys->SetParName(i, Form("pol%i", i));
198 }
199
200 // Perform the background-only fit and store the result.
201 TFitResultPtr result = histogram->Fit("bkg", "QLES+", "", window_start_, window_end_);
202 fit_result->setBkgFitResult(result);
203
204 std::cout << "*************************************************" << std::endl;
205 std::cout << "*************************************************" << std::endl;
206 std::cout << "[ BumpHunter ]: Performing background Toy fit." << std::endl;
207 std::cout << "*************************************************" << std::endl;
208 std::cout << "*************************************************" << std::endl;
209
210 // Perform the toy model fit and store the result.
211 TFitResultPtr result_toys = histogram->Fit("bkg_toys", "QLES+", "", window_start_, window_end_);
212 fit_result->setBkgToysFitResult(result_toys);
213 }
214
215 std::cout << "***************************************************" << std::endl;
216 std::cout << "***************************************************" << std::endl;
217 std::cout << "[ BumpHunter ]: Performing a signal+background fit." << std::endl;
218 std::cout << "***************************************************" << std::endl;
219 std::cout << "***************************************************" << std::endl;
220
221 TF1* full{nullptr};
222
223 // Define the background+signal fit model.
224 if(isChebyshev) {
225 ChebyshevFitFunction full_func(mass_hypothesis, window_end_ - window_start_, bin_width_, bkg_order_model, FitFunction::SignalFitModel::GAUSSIAN, isExp);
226 full = new TF1("full", full_func, -1, 1, poly_order_ + 4);
227 } else {
228 LegendreFitFunction full_func(mass_hypothesis, window_end_ - window_start_, bin_width_, bkg_order_model, FitFunction::SignalFitModel::GAUSSIAN, isExp);
229 full = new TF1("full", full_func, -1, 1, poly_order_ + 4);
230 }
231 full->SetParameter(0, initNorm);
232 full->SetParName(0, "pol0");
233 full->SetParameter(poly_order_ + 1, 0.0);
234 full->SetParName(poly_order_ + 1, "signal norm");
235 full->SetParName(poly_order_ + 2, "mean");
236 full->SetParName(poly_order_ + 3, "sigma");
237 for(int i = 1; i < poly_order_ + 1; i++) {
238 full->SetParameter(i, 0);
239 full->SetParName(i, Form("pol%i", i));
240 }
241 full->FixParameter(poly_order_ + 2, mass_hypothesis);
242 full->FixParameter(poly_order_ + 3, mass_resolution_);
243
244 for(int parI = 0; parI < poly_order_ + 1; parI++) {
245 full->SetParameter(parI, bkg->GetParameter(parI));
246 }
247 TFitResultPtr full_result = histogram->Fit("full", "QLES+", "", window_start_, window_end_);
248 fit_result->setCompFitResult(full_result);
249
250 calculatePValue(fit_result);
251 std::cout << "[ BumpHunter ]: Bkg Fit Status: " << fit_result->getBkgFitResult()->IsValid() << std::endl;
252 std::cout << "[ BumpHunter ]: Bkg Toys Fit Status: " << fit_result->getBkgToysFitResult()->IsValid() << std::endl;
253 std::cout << "[ BumpHunter ]: Full Fit Status: " << fit_result->getCompFitResult()->IsValid() << std::endl;
254 if((!skip_ul) && full_result->IsValid()) { getUpperLimit(histogram, fit_result); }
255
256 // Persist the mass hypothesis used for this fit
257 fit_result->setMass(mass_hypothesis_);
258
259 // Persist the mass after correcting for the mass scale
260 fit_result->setCorrectedMass(corr_mass_);
261
262 // Set the window size
263 fit_result->setWindowSize(window_size_);
264
265 // Set the total number of events within the window
266 fit_result->setIntegral(integral_);
267
268 return fit_result;
269}
270
272 std::cout << "[ BumpHunter ]: Calculating p-value: " << std::endl;
273
274 double signal_yield = result->getCompFitResult()->Parameter(poly_order_ + 1);
275 printDebug("Signal yield: " + std::to_string(signal_yield));
276
277 // In searching for a resonance, a signal is expected to lead to an
278 // excess of events. In this case, a signal yield of < 0 is
279 // meaningless so we set the p-value = 1. This follows the formulation
280 // put forth by Cowen et al. in https://arxiv.org/pdf/1007.1727.pdf.
281 //if(signal_yield <= 0) {
282 // result->setPValue(1);
283 // printDebug("Signal yield is negative ... setting p-value = 1");
284 // return;
285 //}
286
287 // Get the NLL obtained by minimizing the composite model with the signal
288 // yield floating.
289 double mle_nll = result->getCompFitResult()->MinFcnValue();
290 printDebug("NLL when mu = " + std::to_string(signal_yield) + ": " + std::to_string(mle_nll));
291
292 // Get the NLL obtained from the Bkg only fit.
293 double bkg_nll = result->getBkgFitResult()->MinFcnValue();
294 printDebug("NLL when mu = 0: " + std::to_string(bkg_nll));
295
296 // 1) Calculate the likelihood ratio which is chi2 distributed.
297 // 2) From the chi2, calculate the p-value.
298 double q0 = 0;
299 double p_value = 0;
300 getChi2Prob(bkg_nll, mle_nll, q0, p_value);
301
302 // Calculate r0 stat
303 double r0 = -2.0*(mle_nll - bkg_nll);
304 if(signal_yield < 0.0 ) r0 = 2.0*(mle_nll - bkg_nll);
305 printDebug("r0: " + std::to_string(r0));
306 printDebug("Z0: " + std::to_string(r0*TMath::Sqrt(fabs(r0))/fabs(r0)));
307 p_value = 1 - ROOT::Math::gaussian_cdf( r0*TMath::Sqrt(fabs(r0))/fabs(r0) );
308
309 std::cout << "[ BumpHunter ]: p-value: " << p_value << std::endl;
310
311 // Update the result
312 result->setPValue(p_value);
313 result->setQ0(q0);
314}
315
316void BumpHunter::printDebug(std::string message) {
317 std::cout << "[ BumpHunter ]: " << message << std::endl;
318}
319
320void BumpHunter::getUpperLimit(TH1* histogram, HpsFitResult* result) {
322 BumpHunter::getUpperLimitAsymCLs(histogram, result);
323 } else {
324 BumpHunter::getUpperLimitPower(histogram, result);
325 }
326}
327
329 std::cout << "[ BumpHunter ]: Calculating upper limit." << std::endl;
330
331 // Get the signal yield and signal yield error.
332 double signal_yield = result->getCompFitResult()->Parameter(poly_order_ + 1);
333 if(signal_yield < 0) { signal_yield = 0; }
334 double signal_yield_error = result->getCompFitResult()->ParError(poly_order_ + 1);
335
336 // Debug print the signal yield and its error.
337 std::cout << "Signal Yield :: " << signal_yield << std::endl;
338 std::cout << "Signal Yield Error :: " << signal_yield_error << std::endl;
339
340 // Calculate the upper limit according to Equation (69) of "Asymptotic formulae
341 // for likelihood-based tests of new physics" by Cowan et alii. 1.64 is derived
342 // from Equation (1) for a 95% confidence level (alpha = 0.05).
343 double upper_limit = signal_yield + 1.64 * signal_yield_error;
344
345 // The p-value is 0.05 by definition.
346 double p_value = 0.05;
347
348 // Debug print the upper limit.
349 std::cout << "Upper Limit :: " << upper_limit << std::endl;
350 std::cout << "p-Value :: " << p_value << std::endl;
351
352 // Set the upper limit and upper limit p-value.
353 result->setUpperLimit(upper_limit);
354 result->setUpperLimitPValue(p_value);
355}
356
357void BumpHunter::getUpperLimitAsymCLs(TH1* histogram, HpsFitResult* result) {
358 // Determine whether to use an exponential polynomial or normal polynomial.
361 double initNorm = log10(integral_);
362
363 // Instantiate a fit function for the appropriate polynomial order.
364 TF1* comp{nullptr};
365 FitFunction::ModelOrder bkg_order_model;
366 if(poly_order_ == 1) { bkg_order_model = FitFunction::ModelOrder::FIRST; }
367 else if(poly_order_ == 3) { bkg_order_model = FitFunction::ModelOrder::THIRD; }
368 else if(poly_order_ == 5) { bkg_order_model = FitFunction::ModelOrder::FIFTH; }
369 if(isChebyshev) {
371 comp = new TF1("comp_ul", comp_func, -1, 1, poly_order_ + 4);
372 } else {
374 comp = new TF1("comp_ul", comp_func, -1, 1, poly_order_ + 4);
375 }
376 comp->SetParameter(0, initNorm);
377 comp->SetParName(0, "pol0");
378 comp->SetParameter(poly_order_ + 1, 0.0);
379 comp->SetParName(poly_order_ + 1, "signal norm");
380 comp->SetParName(poly_order_ + 2, "mean");
381 comp->SetParName(poly_order_ + 3, "sigma");
382 for(int i = 1; i < poly_order_ + 1; i++) {
383 comp->SetParameter(i, 0);
384 comp->SetParName(i, Form("pol%i", i));
385 }
386 comp->FixParameter(poly_order_ + 2, mass_hypothesis_);
387 comp->FixParameter(poly_order_ + 3, mass_resolution_);
388 //for(int parI = 1; parI < poly_order_ + 1; parI++) {
389 // comp->FixParameter(parI, result->getCompFitResult()->Parameter(parI));
390 //}
391
392 std::cout << "Mass resolution: " << mass_resolution_ << std::endl;
393 std::cout << "[ BumpHunter ]: Calculating upper limit." << std::endl;
394
395 // Get the signal yield obtained from the signal+bkg fit
396 double mu_hat = result->getCompFitResult()->Parameter(poly_order_ + 1);
397 double sigma = result->getCompFitResult()->ParError(poly_order_ + 1);
398 printDebug("Signal yield: " + std::to_string(mu_hat));
399 printDebug("Sigma: " + std::to_string(sigma));
400
401 // Get the minimum NLL value that will be used for testing upper limits.
402 double mle_nll = result->getCompFitResult()->MinFcnValue();
403
404 // Get the NLL obtained assuming the background only hypothesis
405 double bkg_nll = result->getBkgFitResult()->MinFcnValue();
406
407 printDebug("MLE NLL: " + std::to_string(mle_nll));
408 printDebug("mu=0 NLL: " + std::to_string(bkg_nll));
409
410 double mu95up = fabs(mu_hat + 1.64*sigma); //This should give us something close to start
411
412 double CLs = 1.0;
413 double p_mu = 1;
414 double p_b = 1;
415 double r_mu = -9999999.9;
416 int tryN = 0;
417 while(true) {
418 tryN++;
419 comp->FixParameter(poly_order_ + 1, mu95up);
420
421 TFitResultPtr full_result = histogram->Fit("comp_ul", "NQLES", "", window_start_, window_end_);
422 double mu_nll = full_result->MinFcnValue();
423
424 double nllamb_mu = mu_nll - mle_nll;
425 if(mu_hat < 0.0) nllamb_mu = mu_nll - bkg_nll;
426
427 r_mu = 2.0*nllamb_mu;
428 if(mu_hat > mu95up) r_mu = -2.0*nllamb_mu;
429
430 if(r_mu < 0.0)
431 {
432 p_mu = 1.0 - ROOT::Math::gaussian_cdf( -1.0*TMath::Sqrt(-1.0*r_mu) );
433 p_b = 1.0 - ROOT::Math::gaussian_cdf( -1.0*TMath::Sqrt(-1.0*r_mu) - (mu95up/sigma) );
434 }
435 else if(r_mu > mu95up*mu95up/(sigma*sigma))
436 {
437 p_mu = 1.0 - ROOT::Math::gaussian_cdf( (r_mu + mu95up*mu95up/(sigma*sigma))/(2.0*mu95up/sigma) );
438 p_b = 1.0 - ROOT::Math::gaussian_cdf( (r_mu - mu95up*mu95up/(sigma*sigma))/(2.0*mu95up/sigma) );
439 }
440 else
441 {
442 p_mu = 1.0 - ROOT::Math::gaussian_cdf( TMath::Sqrt(r_mu) );
443 p_b = 1.0 - ROOT::Math::gaussian_cdf( TMath::Sqrt(r_mu) - (mu95up/sigma) );
444 }
445
446
447 CLs = p_mu/p_b;
448
449 std::cout << "[ BumpHunter ]: mu: " << mu95up << std::endl;
450 std::cout << "[ BumpHunter ]: mle_nll: " << mle_nll << std::endl;
451 std::cout << "[ BumpHunter ]: bkg_nll: " << bkg_nll << std::endl;
452 std::cout << "[ BumpHunter ]: mu_nll: " << mu_nll << std::endl;
453 std::cout << "[ BumpHunter ]: nllamb_mu: " << nllamb_mu << std::endl;
454 std::cout << "[ BumpHunter ]: r_mu: " << r_mu << std::endl;
455 std::cout << "[ BumpHunter ]: p_mu: " << p_mu << std::endl;
456 std::cout << "[ BumpHunter ]: p_b: " << p_b << std::endl;
457 std::cout << "[ BumpHunter ]: CLs: " << CLs << std::endl;
458 if(tryN > 1000)
459 {
460 std::cout << "[ BumpHunter ]: Upper limit aborting, too many tries" << std::endl;
461
462 result->setUpperLimit(1.64*sigma);
463 result->setUpperLimitPValue(-9.0);
464
465 break;
466 }
467 if((CLs <= 0.051 && CLs > 0.049))
468 {
469 std::cout << "[ BumpHunter ]: Upper limit: " << mu95up << std::endl;
470 std::cout << "[ BumpHunter ]: CLs: " << CLs << std::endl;
471
472 result->setUpperLimit(mu95up);
473 result->setUpperLimitPValue(CLs);
474
475 break;
476 }
477 else if(CLs <= 1e-10) { mu95up = mu95up*0.1; }
478 else if(CLs <= 1e-8) { mu95up = mu95up*0.5; }
479 else if(CLs <= 1e-4) { mu95up = mu95up*0.8; }
480 else if(CLs <= 0.01) { mu95up = mu95up*0.9; }
481 else if(CLs <= 0.04) { mu95up = mu95up*0.99; }
482 else if(CLs <= 0.049) { mu95up = mu95up*0.999; }
483 else if(CLs <= 0.1) { mu95up = mu95up*1.01; }
484 else { mu95up = mu95up*1.1; }
485 printDebug("Setting mu to: " + std::to_string(mu95up));
486 }
487}
488
489void BumpHunter::getUpperLimitPower(TH1* histogram, HpsFitResult* result) {
490 // Determine whether to use an exponential polynomial or normal polynomial.
493 double initNorm = log10(integral_);
494
495 // Instantiate a fit function for the appropriate polynomial order.
496 TF1* comp{nullptr};
497 FitFunction::ModelOrder bkg_order_model;
498 if(poly_order_ == 1) { bkg_order_model = FitFunction::ModelOrder::FIRST; }
499 else if(poly_order_ == 3) { bkg_order_model = FitFunction::ModelOrder::THIRD; }
500 else if(poly_order_ == 5) { bkg_order_model = FitFunction::ModelOrder::FIFTH; }
501 if(isChebyshev) {
503 comp = new TF1("comp_ul", comp_func, -1, 1, poly_order_ + 4);
504 } else {
506 comp = new TF1("comp_ul", comp_func, -1, 1, poly_order_ + 4);
507 }
508 comp->SetParameter(0, initNorm);
509 comp->SetParName(0, "pol0");
510 comp->SetParameter(poly_order_ + 1, 0.0);
511 comp->SetParName(poly_order_ + 1, "signal norm");
512 comp->SetParName(poly_order_ + 2, "mean");
513 comp->SetParName(poly_order_ + 3, "sigma");
514 for(int i = 1; i < poly_order_ + 1; i++) {
515 comp->SetParameter(i, 0);
516 comp->SetParName(i, Form("pol%i", i));
517 }
518 comp->FixParameter(poly_order_ + 2, mass_hypothesis_);
519 comp->FixParameter(poly_order_ + 3, mass_resolution_);
520
521 std::cout << "Mass resolution: " << mass_resolution_ << std::endl;
522 std::cout << "[ BumpHunter ]: Calculating upper limit." << std::endl;
523
524 // Get the signal yield obtained from the signal+bkg fit
525 double signal_yield = result->getCompFitResult()->Parameter(poly_order_ + 1);
526 printDebug("Signal yield: " + std::to_string(signal_yield));
527
528 // Get the minimum NLL value that will be used for testing upper limits.
529 // If the signal yield (mu estimator) at the min NLL is < 0, use the NLL
530 // obtained when mu = 0.
531 double mle_nll = result->getCompFitResult()->MinFcnValue();
532
533 if(signal_yield < 0) {
534 printDebug("Signal yield @ min NLL is < 0. Using NLL when signal yield = 0");
535
536 // Get the NLL obtained assuming the background only hypothesis
537 mle_nll = result->getBkgFitResult()->MinFcnValue();
538
539 signal_yield = 0;
540 }
541 printDebug("MLE NLL: " + std::to_string(mle_nll));
542
543 signal_yield = floor(signal_yield) + 1;
544
545 double p_value = 1;
546 double q0 = 0;
547 while(true) {
548 printDebug("Setting signal yield to: " + std::to_string(signal_yield));
549 //std::cout << "[ BumpHunter ]: Current p-value: " << p_value << std::endl;
550 comp->FixParameter(poly_order_ + 1, signal_yield);
551
552 TFitResultPtr full_result = histogram->Fit("comp_ul", "QLES+", "", window_start_, window_end_);
553 double cond_nll = full_result->MinFcnValue();
554
555 // 1) Calculate the likelihood ratio which is chi2 distributed.
556 // 2) From the chi2, calculate the p-value.
557 getChi2Prob(cond_nll, mle_nll, q0, p_value);
558
559 printDebug("p-value after fit : " + std::to_string(p_value));
560 std::cout << "[ BumpHunter ]: Current Signal Yield: " << signal_yield << std::endl;
561 std::cout << "[ BumpHunter ]: Current p-value: " << p_value << std::endl;
562
563 if((p_value <= 0.0455 && p_value > 0.044)) {
564 std::cout << "[ BumpHunter ]: Upper limit: " << signal_yield << std::endl;
565 std::cout << "[ BumpHunter ]: p-value: " << p_value << std::endl;
566
567 result->setUpperLimit(signal_yield);
568 result->setUpperLimitPValue(p_value);
569
570 break;
571 } else if (signal_yield <= 0 && p_value < 0.044) {
572 std::cout << "[ BumpHunter ]: Caution Background Model suspicious!" << std::endl;
573 std::cout << "[ BumpHunter ]: Upper limit: " << signal_yield << std::endl;
574 std::cout << "[ BumpHunter ]: p-value: " << p_value << std::endl;
575
576 result->setUpperLimit(signal_yield);
577 result->setUpperLimitPValue(p_value);
578
579 break;
580 }
581 else if(p_value <= 0.044) { signal_yield -= 1; }
582 else if(p_value <= 0.059) { signal_yield += 10; }
583 else if(p_value <= 0.10) { signal_yield += 20; }
584 else if(p_value <= 0.2) { signal_yield += 40; }
585 else { signal_yield += 100; }
586 }
587}
588
589std::vector<TH1*> BumpHunter::generateToys(TH1* histogram, double n_toys, int seed, int toy_sig_samples, int bkg_mult, TH1* signal_hist) {
590 gRandom->SetSeed(seed);
591
592 TF1* bkg_toys = histogram->GetFunction("bkg_toys");
593 TF1* sig_toys = new TF1("sig_toys", "gaus", window_start_, window_end_);
594 sig_toys->SetParameters(1.0, mass_hypothesis_, mass_resolution_);
595
596 std::vector<TH1*> hists;
597 int bkg_events = bkg_mult * int(integral_);
598 for(int itoy = 0; itoy < n_toys; ++itoy) {
599 std::string name = "invariant_mass_" + std::to_string(itoy);
600 if(itoy%100 == 0) {
601 std::cout << "Generating Toy " << itoy << std::endl;
602 }
603 TH1F* hist = new TH1F(name.c_str(), name.c_str(), bins_, window_start_, window_end_);
604 for(int i = 0; i < bkg_events; ++i) {
605 hist->Fill(bkg_toys->GetRandom(window_start_, window_end_));
606 }
607 for(int i = 0; i < toy_sig_samples; i++) {
608 double sig_sample = 0;
609 if(signal_hist != NULL) { sig_sample = signal_hist->GetRandom(); }
610 else { sig_sample = sig_toys->GetRandom(window_start_, window_end_); }
611 hist->Fill(sig_sample);
612 }
613 hists.push_back(hist);
614 }
615
616 return hists;
617}
618
619void BumpHunter::getChi2Prob(double cond_nll, double mle_nll, double &q0, double &p_value) {
620 //printDebug("Cond NLL: " + std::to_string(cond_nll));
621 //printDebug("Uncod NLL: " + std::to_string(mle_nll));
622 double diff = cond_nll - mle_nll;
623 //printDebug("Delta NLL: " + std::to_string(diff));
624
625 q0 = 2 * diff;
626 //printDebug("q0: " + std::to_string(q0));
627
628 p_value = ROOT::Math::chisquared_cdf_c(q0, 1);
629 //printDebug("p-value before dividing: " + std::to_string(p_value));
630 p_value *= 0.5;
631 //printDebug("p-value: " + std::to_string(p_value));
632}
633
634void BumpHunter::setBounds(double lower_bound, double upper_bound) {
635 lower_bound_ = lower_bound;
636 upper_bound_ = upper_bound;
637 printf("Fit bounds set to [ %f , %f ]\n", lower_bound_, upper_bound_);
638}
639
640double BumpHunter::correctMass(double mass) {
641 double offset = -1.19892320e4 * pow(mass, 3) + 1.50196798e3 * pow(mass, 2) - 8.38873712e1 * mass + 6.23215746;
642 offset /= 100;
643 printDebug("Offset: " + std::to_string(offset));
644 double cmass = mass - mass * offset;
645 printDebug("Corrected Mass: " + std::to_string(cmass));
646 return cmass;
647}
void getChi2Prob(double min_nll_null, double min_nll, double &q0, double &p_value)
description
double bin_width_
Definition BumpHunter.h:295
int toy_poly_order_
Definition BumpHunter.h:301
void printDebug(std::string message)
Print debug statement.
double correctMass(double mass)
description
double window_end_
Definition BumpHunter.h:323
double mass_hypothesis_
Definition BumpHunter.h:326
void calculatePValue(HpsFitResult *result)
description
double window_size_
Definition BumpHunter.h:289
double window_start_
Definition BumpHunter.h:320
void getUpperLimitPower(TH1 *histogram, HpsFitResult *result)
description
void getUpperLimitAsymptotic(TH1 *histogram, HpsFitResult *result)
description
void getUpperLimitAsymCLs(TH1 *histogram, HpsFitResult *result)
description
void initialize(TH1 *histogram, double &mass_hypothesis)
Given the mass of interest, setup the window parameters and initialize the fit parameters.
double getMassResolution(double mass)
Definition BumpHunter.h:202
int poly_order_
Definition BumpHunter.h:298
double integral_
Definition BumpHunter.h:271
double res_factor_
Definition BumpHunter.h:286
double lower_bound_
Definition BumpHunter.h:265
BumpHunter(FitFunction::BkgModel model, int poly_order, int toy_poly_order, int res_factor, double res_scale=1.00, bool asymptotic_limit=true)
Default Constructor.
bool window_use_res_scale_
Definition BumpHunter.h:332
double upper_bound_
Definition BumpHunter.h:268
FitFunction::BkgModel bkg_model_
Definition BumpHunter.h:274
void setBounds(double low_bound, double high_bound)
Set the histogram bounds.
std::vector< TH1 * > generateToys(TH1 *histogram, double n_toys, int seed, int toy_sig_samples, int bkg_mult=1, TH1 *signal_hist=nullptr)
description
void getUpperLimit(TH1 *histogram, HpsFitResult *result)
Get the signal upper limit.
bool asymptotic_limit_
Definition BumpHunter.h:280
HpsFitResult * performSearch(TH1 *histogram, double mass_hypothesis, bool skip_bkg_fit, bool skip_ul)
Perform a search for a resonance at the given mass hypothesis.
double mass_resolution_
Definition BumpHunter.h:329
double corr_mass_
Definition BumpHunter.h:262
ModelOrder
description
Definition FitFunction.h:28
BkgModel
description
Definition FitFunction.h:39
description
void setCompFitResult(TFitResultPtr comp_result)
Set the result from the signal+background only fit.
void setIntegral(double integral)
Set the integral.
TFitResultPtr getBkgFitResult()
Get background fit result.
void setBkgModelType(FitFunction::BkgModel bkg_model)
Sets the type of background fit function used by the fitter.
void setQ0(double q0)
Set Q0.
TFitResultPtr getCompFitResult()
Get the complete fit result.
void setPolyOrder(int poly_order)
Set the order polynomial used by the fitter.
void setCorrectedMass(double cmass)
Set the corrected mass.
void setPValue(double p_value)
Set the p-value.
void setMass(double mass)
Set mass hypothesis used for this fit.
void setBkgToysFitResult(TFitResultPtr bkg_toy_result)
Set the result from the background only fit for generating toys.
void setUpperLimit(double upper_limit)
Set the 2 sigma upper limit.
void setBkgFitResult(TFitResultPtr bkg_result)
Set the result from the background only fit.
void setUpperLimitPValue(double upper_limit_p_value)
Set the p-value at the end of the upper limit calculation.
TFitResultPtr getBkgToysFitResult()
Get background toy model fit result.
void setWindowSize(double window_size)
Set the size of the fit window used to get this results.