#include <iomanip>
#include <cassert>
#include "TMath.h"
#include "Riostream.h"
#include "TMVA/MethodFisher.h"
#include "TMVA/Tools.h"
#include "TMatrix.h"
#include "TMVA/Ranking.h"
ClassImp(TMVA::MethodFisher)
TMVA::MethodFisher::MethodFisher( const TString& jobName, const TString& methodTitle, DataSet& theData,
const TString& theOption, TDirectory* theTargetDir )
: TMVA::MethodBase( jobName, methodTitle, theData, theOption, theTargetDir )
{
InitFisher();
SetConfigName( TString("Method") + GetMethodName() );
DeclareOptions();
ParseOptions();
ProcessOptions();
if (HasTrainingTree()) InitMatrices();
}
TMVA::MethodFisher::MethodFisher( DataSet& theData,
const TString& theWeightFile,
TDirectory* theTargetDir )
: TMVA::MethodBase( theData, theWeightFile, theTargetDir )
{
InitFisher();
DeclareOptions();
}
void TMVA::MethodFisher::InitFisher( void )
{
SetMethodName( "Fisher" );
SetMethodType( TMVA::Types::kFisher );
SetTestvarName();
fMeanMatx = 0;
fBetw = 0;
fWith = 0;
fCov = 0;
fDiscrimPow = 0;
fSumOfWeightsS = fSumOfWeightsB = 0;
fF0 = 0;
fFisherCoeff = new std::vector<Double_t>( GetNvar() );
SetSignalReferenceCut( 0.0 );
}
void TMVA::MethodFisher::DeclareOptions()
{
DeclareOptionRef( fTheMethod = "Fisher", "Method", "Discrimination method" );
AddPreDefVal(TString("Fisher"));
AddPreDefVal(TString("Mahalanobis"));
}
void TMVA::MethodFisher::ProcessOptions()
{
MethodBase::ProcessOptions();
if (fTheMethod == "Fisher" ) fFisherMethod = kFisher;
else fFisherMethod = kMahalanobis;
CheckForUnusedOptions();
}
TMVA::MethodFisher::~MethodFisher( void )
{
if(fBetw ) delete fBetw;
if(fWith ) delete fWith;
if(fCov ) delete fCov;
if(fDiscrimPow ) delete fDiscrimPow;
if(fFisherCoeff) delete fFisherCoeff;
}
void TMVA::MethodFisher::Train( void )
{
if (!CheckSanity()) fLogger << kFATAL << "<Train> sanity check failed" << Endl;
GetMean();
GetCov_WithinClass();
GetCov_BetweenClass();
GetCov_Full();
GetFisherCoeff();
GetDiscrimPower();
PrintCoefficients();
}
Double_t TMVA::MethodFisher::GetMvaValue()
{
Double_t result = fF0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) result += (*fFisherCoeff)[ivar]*GetEventVal(ivar);
return result;
}
void TMVA::MethodFisher::InitMatrices( void )
{
if (!HasTrainingTree()) {
fLogger << kFATAL << "<InitMatrices> fatal error: Data().TrainingTree() is zero pointer" << Endl;
}
fMeanMatx = new TMatrixD( GetNvar(), 3 );
fBetw = new TMatrixD( GetNvar(), GetNvar() );
fWith = new TMatrixD( GetNvar(), GetNvar() );
fCov = new TMatrixD( GetNvar(), GetNvar() );
fDiscrimPow = new std::vector<Double_t>( GetNvar() );
}
void TMVA::MethodFisher::GetMean( void )
{
Double_t *sumS = new Double_t[(Int_t)GetNvar()];
Double_t *sumB = new Double_t[(Int_t)GetNvar()];
for (Int_t ivar=0; ivar<GetNvar(); ivar++) { sumS[ivar] = sumB[ivar] = 0; }
fSumOfWeightsS = 0;
fSumOfWeightsB = 0;
for (Int_t ievt=0; ievt<Data().GetNEvtTrain(); ievt++) {
ReadTrainingEvent(ievt);
Double_t weight = GetEventWeight();
if (IsSignalEvent()) fSumOfWeightsS += weight;
else fSumOfWeightsB += weight;
Double_t* sum = IsSignalEvent() ? sumS : sumB;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) sum[ivar] += GetEventVal( ivar )*weight;
}
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
(*fMeanMatx)( ivar, 2 ) = sumS[ivar];
(*fMeanMatx)( ivar, 0 ) = sumS[ivar]/fSumOfWeightsS;
(*fMeanMatx)( ivar, 2 ) += sumB[ivar];
(*fMeanMatx)( ivar, 1 ) = sumB[ivar]/fSumOfWeightsB;
(*fMeanMatx)( ivar, 2 ) /= (fSumOfWeightsS + fSumOfWeightsB);
}
delete [] sumS;
delete [] sumB;
}
void TMVA::MethodFisher::GetCov_WithinClass( void )
{
assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0 );
const Int_t nvar = GetNvar();
const Int_t nvar2 = nvar*nvar;
Double_t *sumSig = new Double_t[nvar2];
Double_t *sumBgd = new Double_t[nvar2];
Double_t *xval = new Double_t[nvar];
memset(sumSig,0,nvar2*sizeof(Double_t));
memset(sumBgd,0,nvar2*sizeof(Double_t));
for (Int_t ievt=0; ievt<Data().GetNEvtTrain(); ievt++) {
ReadTrainingEvent(ievt);
Double_t weight = GetEventWeight();
for (Int_t x=0; x<nvar; x++) xval[x] = GetEventVal( x );
Int_t k=0;
for (Int_t x=0; x<nvar; x++) {
for (Int_t y=0; y<nvar; y++) {
Double_t v = ( (xval[x] - (*fMeanMatx)(x, 0))*(xval[y] - (*fMeanMatx)(y, 0)) )*weight;
if (IsSignalEvent()) sumSig[k] += v;
else sumBgd[k] += v;
k++;
}
}
}
Int_t k=0;
for (Int_t x=0; x<nvar; x++) {
for (Int_t y=0; y<nvar; y++) {
(*fWith)(x, y) = (sumSig[k] + sumBgd[k])/(fSumOfWeightsS + fSumOfWeightsB);
k++;
}
}
delete [] sumSig;
delete [] sumBgd;
delete [] xval;
}
void TMVA::MethodFisher::GetCov_BetweenClass( void )
{
assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0);
Double_t prodSig, prodBgd;
for (Int_t x=0; x<GetNvar(); x++) {
for (Int_t y=0; y<GetNvar(); y++) {
prodSig = ( ((*fMeanMatx)(x, 0) - (*fMeanMatx)(x, 2))*
((*fMeanMatx)(y, 0) - (*fMeanMatx)(y, 2)) );
prodBgd = ( ((*fMeanMatx)(x, 1) - (*fMeanMatx)(x, 2))*
((*fMeanMatx)(y, 1) - (*fMeanMatx)(y, 2)) );
(*fBetw)(x, y) = (fSumOfWeightsS*prodSig + fSumOfWeightsB*prodBgd) / (fSumOfWeightsS + fSumOfWeightsB);
}
}
}
void TMVA::MethodFisher::GetCov_Full( void )
{
for (Int_t x=0; x<GetNvar(); x++)
for (Int_t y=0; y<GetNvar(); y++)
(*fCov)(x, y) = (*fWith)(x, y) + (*fBetw)(x, y);
}
void TMVA::MethodFisher::GetFisherCoeff( void )
{
assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0);
TMatrixD* theMat = 0;
switch (GetFisherMethod()) {
case kFisher:
theMat = fWith;
break;
case kMahalanobis:
theMat = fCov;
break;
default:
fLogger << kFATAL << "<GetFisherCoeff> undefined method" << GetFisherMethod() << Endl;
}
TMatrixD invCov( *theMat );
if ( TMath::Abs(invCov.Determinant()) < 10E-24 ) {
fLogger << kWARNING << "<GetFisherCoeff> matrix is almost singular with deterninant="
<< TMath::Abs(invCov.Determinant())
<< " did you use the variables that are linear combinations or highly correlated?"
<< Endl;
}
if ( TMath::Abs(invCov.Determinant()) < 10E-120 ) {
fLogger << kFATAL << "<GetFisherCoeff> matrix is singular with determinant="
<< TMath::Abs(invCov.Determinant())
<< " did you use the variables that are linear combinations?"
<< Endl;
}
invCov.Invert();
Double_t xfact = TMath::Sqrt( fSumOfWeightsS*fSumOfWeightsB ) / (fSumOfWeightsS + fSumOfWeightsB);
std::vector<Double_t> diffMeans( GetNvar() );
Int_t ivar, jvar;
for (ivar=0; ivar<GetNvar(); ivar++) {
(*fFisherCoeff)[ivar] = 0;
for (jvar=0; jvar<GetNvar(); jvar++) {
Double_t d = (*fMeanMatx)(jvar, 0) - (*fMeanMatx)(jvar, 1);
(*fFisherCoeff)[ivar] += invCov(ivar, jvar)*d;
}
(*fFisherCoeff)[ivar] *= xfact;
}
fF0 = 0.0;
for (ivar=0; ivar<GetNvar(); ivar++){
fF0 += (*fFisherCoeff)[ivar]*((*fMeanMatx)(ivar, 0) + (*fMeanMatx)(ivar, 1));
}
fF0 /= -2.0;
}
void TMVA::MethodFisher::GetDiscrimPower( void )
{
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
if ((*fCov)(ivar, ivar) != 0)
(*fDiscrimPow)[ivar] = (*fBetw)(ivar, ivar)/(*fCov)(ivar, ivar);
else
(*fDiscrimPow)[ivar] = 0;
}
}
const TMVA::Ranking* TMVA::MethodFisher::CreateRanking()
{
fRanking = new Ranking( GetName(), "Discr. power" );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fRanking->AddRank( *new Rank( GetInputExp(ivar), (*fDiscrimPow)[ivar] ) );
}
return fRanking;
}
void TMVA::MethodFisher::PrintCoefficients( void )
{
fLogger << kINFO << "Results for Fisher coefficients:" << Endl;
if (GetVariableTransform() != Types::kNone) {
fLogger << kINFO << "NOTE: The coefficients must be applied to TRANFORMED variables" << Endl;
fLogger << kINFO << " Name of the transformation: \"" << GetVarTransform().GetName() << "\"" << Endl;
}
std::vector<TString> vars;
std::vector<Double_t> coeffs;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
vars .push_back( GetInputExp(ivar) );
coeffs.push_back( (*fFisherCoeff)[ivar] );
}
vars .push_back( "(offset)" );
coeffs.push_back( fF0 );
TMVA::gTools().FormattedOutput( coeffs, vars, "Variable" , "Coefficient", fLogger );
if (IsNormalised()) {
fLogger << kINFO << "NOTE: You have chosen to use the \"Normalise\" booking option. Hence, the" << Endl;
fLogger << kINFO << " coefficients must be applied to NORMALISED (') variables as follows:" << Endl;
Int_t maxL = 0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) if (GetInputExp(ivar).Length() > maxL) maxL = GetInputExp(ivar).Length();
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fLogger << kINFO
<< setw(maxL+9) << TString("[") + GetInputExp(ivar) + "]' = 2*("
<< setw(maxL+2) << TString("[") + GetInputExp(ivar) + "]"
<< setw(3) << (GetXmin(ivar) > 0 ? " - " : " + ")
<< setw(6) << TMath::Abs(GetXmin(ivar)) << setw(3) << ")/"
<< setw(6) << (GetXmax(ivar) - GetXmin(ivar) )
<< setw(3) << " - 1"
<< Endl;
}
fLogger << kINFO << "The TMVA Reader will properly account for this normalisation, but if the" << Endl;
fLogger << kINFO << "Fisher classifier is applied outside the Reader, the transformation must be" << Endl;
fLogger << kINFO << "implemented -- or the \"Normalise\" option is removed and Fisher retrained." << Endl;
fLogger << kINFO << Endl;
}
}
void TMVA::MethodFisher::WriteWeightsToStream( std::ostream& o ) const
{
o << std::setprecision(12) << fF0 << endl;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) o << std::setprecision(12) << (*fFisherCoeff)[ivar] << endl;
}
void TMVA::MethodFisher::ReadWeightsFromStream( istream& istr )
{
istr >> fF0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) istr >> (*fFisherCoeff)[ivar];
}
void TMVA::MethodFisher::MakeClassSpecific( std::ostream& fout, const TString& className ) const
{
fout << " double fFisher0;" << endl;
fout << " std::vector<double> fFisherCoefficients;" << endl;
fout << "};" << endl;
fout << "" << endl;
fout << "inline void " << className << "::Initialize() " << endl;
fout << "{" << endl;
fout << " fFisher0 = " << std::setprecision(12) << fF0 << ";" << endl;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fout << " fFisherCoefficients.push_back( " << std::setprecision(12) << (*fFisherCoeff)[ivar] << " );" << endl;
}
fout << endl;
fout << " // sanity check" << endl;
fout << " if (fFisherCoefficients.size() != fNvars) {" << endl;
fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\"::Initialize: mismatch in number of input values\"" << endl;
fout << " << fFisherCoefficients.size() << \" != \" << fNvars << std::endl;" << endl;
fout << " fStatusIsClean = false;" << endl;
fout << " } " << endl;
fout << "}" << endl;
fout << endl;
fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << endl;
fout << "{" << endl;
fout << " double retval = fFisher0;" << endl;
fout << " for (size_t ivar = 0; ivar < fNvars; ivar++) {" << endl;
fout << " retval += fFisherCoefficients[ivar]*inputValues[ivar];" << endl;
fout << " }" << endl;
fout << endl;
fout << " return retval;" << endl;
fout << "}" << endl;
fout << endl;
fout << "// Clean up" << endl;
fout << "inline void " << className << "::Clear() " << endl;
fout << "{" << endl;
fout << " // clear coefficients" << endl;
fout << " fFisherCoefficients.clear(); " << endl;
fout << "}" << endl;
}
void TMVA::MethodFisher::GetHelpMessage() const
{
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "Fisher discriminants select events by distinguishing the mean " << Endl;
fLogger << "values of the signal and background distributions in a trans- " << Endl;
fLogger << "formed variable space where linear correlations are removed." << Endl;
fLogger << Endl;
fLogger << " (More precisely: the \"linear discriminator\" determines" << Endl;
fLogger << " an axis in the (correlated) hyperspace of the input " << Endl;
fLogger << " variables such that, when projecting the output classes " << Endl;
fLogger << " (signal and background) upon this axis, they are pushed " << Endl;
fLogger << " as far as possible away from each other, while events" << Endl;
fLogger << " of a same class are confined in a close vicinity. The " << Endl;
fLogger << " linearity property of this classifier is reflected in the " << Endl;
fLogger << " metric with which \"far apart\" and \"close vicinity\" are " << Endl;
fLogger << " determined: the covariance matrix of the discriminating" << Endl;
fLogger << " variable space.)" << Endl;
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "Optimal performance for Fisher discriminants is obtained for " << Endl;
fLogger << "linearly correlated Gaussian-distributed variables. Any deviation" << Endl;
fLogger << "from this ideal reduces the achievable separation power. In " << Endl;
fLogger << "particular, no discrimination at all is achieved for a variable" << Endl;
fLogger << "that has the same sample mean for signal and background, even if " << Endl;
fLogger << "the shapes of the distributions are very different. Thus, Fisher " << Endl;
fLogger << "discriminants often benefit from suitable transformations of the " << Endl;
fLogger << "input variables. For example, if a variable x in [-1,1] has a " << Endl;
fLogger << "a parabolic signal distributions, and a uniform background" << Endl;
fLogger << "distributions, their mean value is zero in both cases, leading " << Endl;
fLogger << "to no separation. The simple transformation x -> |x| renders this " << Endl;
fLogger << "variable powerful for the use in a Fisher discriminant." << Endl;
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "None" << Endl;
}
Last change: Sat Nov 1 10:21:46 2008
Last generated: 2008-11-01 10:21
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.