#include <assert.h>
#include <algorithm>
#include "TFile.h"
#include "TObjString.h"
#include "TMath.h"
#include "TMVA/MethodPDERS.h"
#include "TMVA/Tools.h"
#include "TMVA/RootFinder.h"
#define TMVA_MethodPDERS__countByHand__Debug__
#undef TMVA_MethodPDERS__countByHand__Debug__
namespace TMVA {
const Bool_t MethodPDERS_UseFindRoot = kTRUE;
}
TMVA::MethodPDERS* TMVA::MethodPDERS::fgThisPDERS = NULL;
ClassImp(TMVA::MethodPDERS)
TMVA::MethodPDERS::MethodPDERS( const TString& jobName, const TString& methodTitle, DataSet& theData,
const TString& theOption, TDirectory* theTargetDir )
: MethodBase( jobName, methodTitle, theData, theOption, theTargetDir )
, fDelta(0)
, fShift(0)
{
InitPDERS();
SetConfigName( TString("Method") + GetMethodName() );
DeclareOptions();
ParseOptions();
ProcessOptions();
}
TMVA::MethodPDERS::MethodPDERS( DataSet& theData,
const TString& theWeightFile,
TDirectory* theTargetDir )
: MethodBase( theData, theWeightFile, theTargetDir )
, fDelta(0)
, fShift(0)
{
InitPDERS();
DeclareOptions();
}
void TMVA::MethodPDERS::InitPDERS( void )
{
SetMethodName( "PDERS" );
SetMethodType( Types::kPDERS );
SetTestvarName();
fBinaryTreeS = fBinaryTreeB = NULL;
UpdateThis();
fDeltaFrac = 3.0;
fVRangeMode = kAdaptive;
fKernelEstimator = kBox;
fNEventsMin = 100;
fNEventsMax = 200;
fMaxVIterations = 150;
fInitialScale = 0.99;
fGaussSigma = 0.1;
fNormTree = kFALSE;
fkNNTests = 1000;
fkNNMin = Int_t(fNEventsMin);
fkNNMax = Int_t(fNEventsMax);
fInitializedVolumeEle = kFALSE;
fAverageRMS.clear();
SetSignalReferenceCut( 0.0 );
}
TMVA::MethodPDERS::~MethodPDERS( void )
{
if(fDelta) delete fDelta;
if(fShift) delete fShift;
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
}
void TMVA::MethodPDERS::DeclareOptions()
{
DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
AddPreDefVal(TString("Unscaled"));
AddPreDefVal(TString("MinMax"));
AddPreDefVal(TString("RMS"));
AddPreDefVal(TString("Adaptive"));
AddPreDefVal(TString("kNN"));
DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
AddPreDefVal(TString("Box"));
AddPreDefVal(TString("Sphere"));
AddPreDefVal(TString("Teepee"));
AddPreDefVal(TString("Gauss"));
AddPreDefVal(TString("Sinc3"));
AddPreDefVal(TString("Sinc5"));
AddPreDefVal(TString("Sinc7"));
AddPreDefVal(TString("Sinc9"));
AddPreDefVal(TString("Sinc11"));
AddPreDefVal(TString("Lanczos2"));
AddPreDefVal(TString("Lanczos3"));
AddPreDefVal(TString("Lanczos5"));
AddPreDefVal(TString("Lanczos8"));
AddPreDefVal(TString("Trim"));
DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
}
void TMVA::MethodPDERS::ProcessOptions()
{
MethodBase::ProcessOptions();
fGaussSigmaNorm = fGaussSigma;
fVRangeMode = MethodPDERS::kUnsupported;
if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
else {
fLogger << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
}
if (fKernelString == "Box" ) fKernelEstimator = kBox;
else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
else {
fLogger << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
}
fLogger << kVERBOSE << "interpreted option string: vRangeMethod: '"
<< (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
(fVRangeMode == kUnscaled) ? "Unscaled" :
(fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
fLogger << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
else
fLogger << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
<< fNEventsMin << " " << fNEventsMax
<< " " << fMaxVIterations << " " << fInitialScale << Endl;
fLogger << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
}
void TMVA::MethodPDERS::Train( void )
{
if (!CheckSanity()) fLogger << kFATAL << "<Train> sanity check failed" << Endl;
if (IsNormalised()) fLogger << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
<< "please remove the option from the configuration string, or "
<< "use \"!Normalise\""
<< Endl;
CreateBinarySearchTrees( Data().GetTrainingTree() );
CalcAverages();
SetVolumeElement();
fInitializedVolumeEle = kTRUE;
}
Double_t TMVA::MethodPDERS::GetMvaValue()
{
if (fInitializedVolumeEle == kFALSE) {
fInitializedVolumeEle = kTRUE;
assert( fBinaryTreeS && fBinaryTreeB );
CalcAverages();
SetVolumeElement();
}
return this->RScalc( GetEvent() );
}
void TMVA::MethodPDERS::CalcAverages()
{
if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
fAverageRMS.clear();
fBinaryTreeS->CalcStatistics();
fBinaryTreeB->CalcStatistics();
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Float_t rmsS = fBinaryTreeS->RMS(Types::kSignal, ivar);
Float_t rmsB = fBinaryTreeB->RMS(Types::kBackground, ivar);
fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
}
}
}
void TMVA::MethodPDERS::CreateBinarySearchTrees( TTree* tree )
{
assert( tree != 0 );
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeB = new BinarySearchTree();
if (fNormTree) {
fBinaryTreeS->SetNormalize( kTRUE );
fBinaryTreeB->SetNormalize( kTRUE );
}
fBinaryTreeS->Fill( *this, tree, 1 );
fBinaryTreeB->Fill( *this, tree, 0 );
if (fNormTree) {
fBinaryTreeS->NormalizeTree();
fBinaryTreeB->NormalizeTree();
}
fScaleS = 1.0/fBinaryTreeS->GetSumOfWeights();
fScaleB = 1.0/fBinaryTreeB->GetSumOfWeights();
fLogger << kVERBOSE << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
}
void TMVA::MethodPDERS::SetVolumeElement( void )
{
if(GetNvar()<=0) {
fLogger << kFATAL << "GetNvar() <= 0: " << GetNvar() << Endl;
}
fkNNMin = Int_t(fNEventsMin);
fkNNMax = Int_t(fNEventsMax);
if(fDelta) delete fDelta;
if(fShift) delete fShift;
fDelta = new std::vector<Float_t>( GetNvar() );
fShift = new std::vector<Float_t>( GetNvar() );
switch (fVRangeMode) {
case kRMS:
case kkNN:
case kAdaptive:
if ((Int_t)fAverageRMS.size() != GetNvar())
fLogger << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
{
(*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
fLogger << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
<< "\t]: " << fAverageRMS[ivar]
<< "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
<< Endl;
}
break;
case kMinMax:
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
(*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
break;
case kUnscaled:
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
(*fDelta)[ivar] = fDeltaFrac;
break;
default:
fLogger << kFATAL << "<SetVolumeElement> unknown range-set mode: "
<< fVRangeMode << Endl;
}
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
(*fShift)[ivar] = 0.5;
}
Double_t TMVA::MethodPDERS::IGetVolumeContentForRoot( Double_t scale )
{
return ThisPDERS()->GetVolumeContentForRoot( scale );
}
Double_t TMVA::MethodPDERS::GetVolumeContentForRoot( Double_t scale )
{
Volume v( *fHelpVolume );
v.ScaleInterval( scale );
Double_t cS = GetBinaryTreeSig()->SearchVolume( &v );
Double_t cB = GetBinaryTreeBkg()->SearchVolume( &v );
v.Delete();
return cS + cB;
}
Float_t TMVA::MethodPDERS::RScalc( const Event& e )
{
std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetVal(ivar);
std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
(*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
(*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
}
Volume *volume = new Volume( lb, ub );
Float_t countS = 0;
Float_t countB = 0;
#ifdef TMVA_MethodPDERS__countByHand__Debug__
countS = fBinaryTreeS->SearchVolume( volume );
countB = fBinaryTreeB->SearchVolume( volume );
Int_t iS = 0, iB = 0;
for (Int_t ievt_=0; ievt_<Data().GetNEvtTrain(); ievt_++) {
Data().ReadTrainEvent(ievt_);
Bool_t inV;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Float_t x = GetEventVal(ivar);
inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
if (!inV) break;
}
if (inV) {
if (IsSignalEvent()) iS++;
else iB++;
}
}
fLogger << kVERBOSE << "debug: my test: S/B: " << iS << " " << iB << Endl;
fLogger << kVERBOSE << "debug: binTree: S/B: " << countS << " " << countB << Endl << Endl;
#endif
if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled ) {
std::vector<const BinarySearchTreeNode*> eventsS;
std::vector<const BinarySearchTreeNode*> eventsB;
fBinaryTreeS->SearchVolume( volume, &eventsS );
fBinaryTreeB->SearchVolume( volume, &eventsB );
countS = KernelEstimate( e, eventsS, *volume );
countB = KernelEstimate( e, eventsB, *volume );
}
else if (fVRangeMode == kAdaptive) {
if (MethodPDERS_UseFindRoot) {
fHelpVolume = volume;
UpdateThis();
RootFinder rootFinder( &IGetVolumeContentForRoot, 0.01, 50, 200, 10 );
Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
Volume v( *volume );
v.ScaleInterval( scale );
std::vector<const BinarySearchTreeNode*> eventsS;
std::vector<const BinarySearchTreeNode*> eventsB;
fBinaryTreeS->SearchVolume( &v, &eventsS );
fBinaryTreeB->SearchVolume( &v, &eventsB );
countS = KernelEstimate( e, eventsS, v );
countB = KernelEstimate( e, eventsB, v );
v.Delete();
fHelpVolume = NULL;
}
else {
countS = fBinaryTreeS->SearchVolume( volume );
countB = fBinaryTreeB->SearchVolume( volume );
Float_t nEventsO = countS + countB;
Int_t i_=0;
while (nEventsO < fNEventsMin) {
volume->ScaleInterval( 1.15 );
countS = fBinaryTreeS->SearchVolume( volume );
countB = fBinaryTreeB->SearchVolume( volume );
nEventsO = countS + countB;
i_++;
}
if (i_ > 50) fLogger << kWARNING << "warning in event: " << e
<< ": adaptive volume pre-adjustment reached "
<< ">50 iterations in while loop (" << i_ << ")" << Endl;
Float_t nEventsN = nEventsO;
Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
Float_t scaleO = 1.0;
Float_t scaleN = fInitialScale;
Float_t scale = scaleN;
Float_t cS = countS;
Float_t cB = countB;
for (Int_t ic=1; ic<fMaxVIterations; ic++) {
if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
Volume* v = new Volume( *volume );
v->ScaleInterval( scale );
cS = fBinaryTreeS->SearchVolume( v );
cB = fBinaryTreeB->SearchVolume( v );
nEventsN = cS + cB;
if (nEventsN > 1 && nEventsN - nEventsO != 0)
if (scaleN - scaleO != 0)
scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
else
scale += (-0.01);
else
scale += 0.5;
scaleN = scale;
if (TMath::Abs(cS + cB - nEventsE) < TMath::Abs(countS + countB - nEventsE) &&
(cS + cB >= fNEventsMin || countS + countB < cS + cB)) {
countS = cS; countB = cB;
}
v->Delete();
delete v;
}
else break;
}
nEventsN = countS + countB;
if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
fLogger << kWARNING << "warning in event " << e
<< ": adaptive volume adjustment reached "
<< "max. #iterations (" << fMaxVIterations << ")"
<< "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
<< Endl;
}
}
else if (fVRangeMode == kkNN)
{
std::vector< const BinarySearchTreeNode* > eventsS;
std::vector< const BinarySearchTreeNode* > eventsB;
Volume v(*volume);
Int_t kNNcountS = fBinaryTreeS->SearchVolumeWithMaxLimit( &v, &eventsS, fkNNMax+1 );
Int_t kNNcountB = fBinaryTreeB->SearchVolumeWithMaxLimit( &v, &eventsB, fkNNMax+1 );
Int_t t_times = 0;
while ( !(kNNcountS+kNNcountB >= fkNNMin && kNNcountS+kNNcountB <= fkNNMax) ) {
if (kNNcountS+kNNcountB < fkNNMin) {
Float_t scale = 2;
volume->ScaleInterval( scale );
}
else if (kNNcountS+kNNcountB > fkNNMax) {
Float_t scale = 0.1;
volume->ScaleInterval( scale );
}
eventsS.clear();
eventsB.clear();
v = *volume ;
kNNcountS = fBinaryTreeS->SearchVolumeWithMaxLimit( &v, &eventsS, fkNNMax+1 );
kNNcountB = fBinaryTreeB->SearchVolumeWithMaxLimit( &v, &eventsB, fkNNMax+1 );
t_times++;
if (t_times == fMaxVIterations) {
fLogger << kWARNING << "warining in event" << e
<< ": kNN volume adjustment reached "
<< "max. #iterations (" << fMaxVIterations << ")"
<< "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
break;
}
}
Double_t *dim_normalization = new Double_t[GetNvar()];
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
}
std::vector<const BinarySearchTreeNode*> tempVectorS;
std::vector<const BinarySearchTreeNode*> tempVectorB;
if (kNNcountS+kNNcountB >= fkNNMin) {
std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcountS+kNNcountB );
for (Int_t j=0;j< Int_t(eventsS.size()) ;j++)
(*distances)[j] = GetNormalizedDistance ( e, *eventsS[j], dim_normalization );
for (Int_t j=0;j< Int_t(eventsB.size()) ;j++)
(*distances)[j + Int_t(eventsS.size())] = GetNormalizedDistance( e, *eventsB[j], dim_normalization );
std::vector<Double_t>::iterator wsk = distances->begin();
for (Int_t j=0;j<fkNNMin-1;j++) wsk++;
std::nth_element( distances->begin(), wsk, distances->end() );
for (Int_t j=0;j<Int_t(eventsS.size());j++) {
Double_t dist = GetNormalizedDistance( e, *eventsS[j], dim_normalization );
if (dist <= (*distances)[fkNNMin-1])
tempVectorS.push_back( eventsS[j] );
}
for (Int_t j=0;j<Int_t(eventsB.size());j++) {
Double_t dist = GetNormalizedDistance( e, *eventsB[j], dim_normalization );
if (dist <= (*distances)[fkNNMin-1]) tempVectorB.push_back( eventsB[j] );
}
fMax_distance = (*distances)[fkNNMin-1];
delete distances;
}
countS = KernelEstimate( e, tempVectorS, v );
countB = KernelEstimate( e, tempVectorB, v );
}
else {
fLogger << kFATAL << "<RScalc> unknown RangeMode: " << fVRangeMode << Endl;
}
delete volume;
delete lb;
delete ub;
if (countS < 1e-20 && countB < 1e-20) return 0.5;
if (countB < 1e-20) return 1.0;
if (countS < 1e-20) return 0.0;
Float_t r = countB*fScaleB/(countS*fScaleS);
return 1.0/(r + 1.0);
}
Double_t TMVA::MethodPDERS::KernelEstimate( const Event & event,
std::vector<const BinarySearchTreeNode*>& events, Volume& v )
{
Double_t pdfSum = 0;
Double_t *dim_normalization = new Double_t[GetNvar()];
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
pdfSum += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
}
return KernelNormalization( pdfSum < 0. ? 0. : pdfSum );
}
Double_t TMVA::MethodPDERS::ApplyKernelFunction (Double_t normalized_distance)
{
switch (fKernelEstimator) {
case kBox:
case kSphere:
return 1;
break;
case kTeepee:
return (1 - normalized_distance);
break;
case kGauss:
return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
break;
case kSinc3:
case kSinc5:
case kSinc7:
case kSinc9:
case kSinc11: {
Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
return NormSinc (side_crossings * normalized_distance);
}
break;
case kLanczos2:
return LanczosFilter (2, normalized_distance);
break;
case kLanczos3:
return LanczosFilter (3, normalized_distance);
break;
case kLanczos5:
return LanczosFilter (5, normalized_distance);
break;
case kLanczos8:
return LanczosFilter (8, normalized_distance);
break;
case kTrim: {
Double_t x = normalized_distance / fMax_distance;
x = 1 - x*x*x;
return x*x*x;
}
break;
default:
fLogger << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
break;
}
return 0;
}
Double_t TMVA::MethodPDERS::KernelNormalization (Double_t pdf)
{
static Double_t ret = 1.0;
if (ret != 0.0) return ret*pdf;
switch (fKernelEstimator) {
case kBox:
case kSphere:
ret = 1.;
break;
case kTeepee:
ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
break;
case kGauss:
ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
break;
case kSinc3:
case kSinc5:
case kSinc7:
case kSinc9:
case kSinc11:
case kLanczos2:
case kLanczos3:
case kLanczos5:
case kLanczos8:
ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
break;
default:
fLogger << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
}
ret *= ( TMath::Power (2., GetNvar()) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
return ret*pdf;
}
Double_t TMVA::MethodPDERS::GetNormalizedDistance ( const Event &base_event,
const BinarySearchTreeNode &sample_event,
Double_t *dim_normalization)
{
Double_t ret=0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetVal(ivar));
ret += dist*dist;
}
return TMath::Sqrt (ret);
}
Double_t TMVA::MethodPDERS::NormSinc (Double_t x)
{
if (x < 10e-10 && x > -10e-10) {
return 1;
}
Double_t pix = TMath::Pi() * x;
Double_t sinc = TMath::Sin(pix) / pix;
Double_t ret;
if (GetNvar() % 2)
ret = TMath::Power (sinc, GetNvar());
else
ret = TMath::Abs (sinc) * TMath::Power (sinc, GetNvar() - 1);
return ret;
}
Double_t TMVA::MethodPDERS::LanczosFilter (Int_t level, Double_t x)
{
if (x < 10e-10 && x > -10e-10) {
return 1;
}
Double_t pix = TMath::Pi() * x;
Double_t pixtimesn = pix * ((Double_t) level);
Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
Double_t ret;
if (GetNvar() % 2) ret = TMath::Power (lanczos, GetNvar());
else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, GetNvar() - 1);
return ret;
}
Float_t TMVA::MethodPDERS::GetError( Float_t countS, Float_t countB,
Float_t sumW2S, Float_t sumW2B ) const
{
Float_t c = fScaleB/fScaleS;
Float_t d = countS + c*countB; d *= d;
if (d < 1e-10) return 1;
Float_t f = c*c/d/d;
Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
if (err < 1e-10) return 1;
return sqrt(err);
}
void TMVA::MethodPDERS::WriteWeightsToStream( ostream& o ) const
{
if (TxtWeightsOnly()) {
if (fBinaryTreeS)
o << *fBinaryTreeS;
else
fLogger << kFATAL << "Signal binary search tree not available" << Endl;
if (fBinaryTreeB)
o << *fBinaryTreeB;
else
fLogger << kFATAL << "Background binary search tree not available" << Endl;
}
else {
TString rfname( GetWeightFileName() ); rfname.ReplaceAll( ".txt", ".root" );
o << "# weights stored in root i/o file: " << rfname << endl;
}
}
void TMVA::MethodPDERS::ReadWeightsFromStream( istream& istr)
{
if (TxtWeightsOnly()) {
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
fBinaryTreeS = new BinarySearchTree();
fBinaryTreeB = new BinarySearchTree();
istr >> *fBinaryTreeS >> *fBinaryTreeB;
fBinaryTreeS->SetPeriode( GetVarTransform().Variables().size() );
fBinaryTreeB->SetPeriode( GetVarTransform().Variables().size() );
fBinaryTreeS->CalcStatistics();
fBinaryTreeB->CalcStatistics();
fBinaryTreeS->CountNodes();
fBinaryTreeB->CountNodes();
fScaleS = 1.0/fBinaryTreeS->GetSumOfWeights();
fScaleB = 1.0/fBinaryTreeB->GetSumOfWeights();
fLogger << kVERBOSE << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
CalcAverages();
SetVolumeElement();
fInitializedVolumeEle = kTRUE;
}
}
void TMVA::MethodPDERS::WriteWeightsToStream( TFile& ) const
{
}
void TMVA::MethodPDERS::ReadWeightsFromStream( TFile& )
{
}
void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
{
fout << " // not implemented for class: \"" << className << "\"" << endl;
fout << "};" << endl;
}
void TMVA::MethodPDERS::GetHelpMessage() const
{
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "PDERS is a generalization of the projective likelihood classifier " << Endl;
fLogger << "to N dimensions, where N is the number of input variables used." << Endl;
fLogger << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
fLogger << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
fLogger << "were known, this classifier would exploit the full information" << Endl;
fLogger << "contained in the input variables, and would hence be optimal. In " << Endl;
fLogger << "practice however, huge training samples are necessary to sufficiently " << Endl;
fLogger << "populate the multidimensional phase space. " << Endl;
fLogger << Endl;
fLogger << "The simplest implementation of PDERS counts the number of signal" << Endl;
fLogger << "and background events in the vicinity of a test event, and returns" << Endl;
fLogger << "a weight according to the majority species of the neighboring events." << Endl;
fLogger << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
fLogger << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
fLogger << "e.g., distinct islands of signal and background regions. Because of " << Endl;
fLogger << "the exponential growth of the phase space, it is important to restrict" << Endl;
fLogger << "the number of input variables (dimension) to the strictly necessary." << Endl;
fLogger << Endl;
fLogger << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
fLogger << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
fLogger << "memory, limits the number of training events that can effectively be " << Endl;
fLogger << "used to model the multidimensional PDF." << Endl;
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
fLogger << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
fLogger << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
fLogger << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
fLogger << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
}
Last change: Sat Nov 1 10:21:53 2008
Last generated: 2008-11-01 10:21
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.