#include "TMVA/MethodHMatrix.h"
#include "TMVA/Tools.h"
#include "TMatrix.h"
#include "Riostream.h"
#include <algorithm>
ClassImp(TMVA::MethodHMatrix)
TMVA::MethodHMatrix::MethodHMatrix( const TString& jobName, const TString& methodTitle, DataSet& theData,
const TString& theOption, TDirectory* theTargetDir )
: TMVA::MethodBase( jobName, methodTitle, theData, theOption, theTargetDir )
{
InitHMatrix();
SetConfigName( TString("Method") + GetMethodName() );
DeclareOptions();
ParseOptions();
ProcessOptions();
}
TMVA::MethodHMatrix::MethodHMatrix( DataSet & theData,
const TString& theWeightFile,
TDirectory* theTargetDir )
: TMVA::MethodBase( theData, theWeightFile, theTargetDir )
{
InitHMatrix();
}
void TMVA::MethodHMatrix::InitHMatrix( void )
{
SetMethodName( "HMatrix" );
SetMethodType( TMVA::Types::kHMatrix );
SetTestvarName();
SetNormalised( kTRUE );
fInvHMatrixS = new TMatrixD( GetNvar(), GetNvar() );
fInvHMatrixB = new TMatrixD( GetNvar(), GetNvar() );
fVecMeanS = new TVectorD( GetNvar() );
fVecMeanB = new TVectorD( GetNvar() );
SetSignalReferenceCut( 0.0 );
}
TMVA::MethodHMatrix::~MethodHMatrix( void )
{
if (NULL != fInvHMatrixS) delete fInvHMatrixS;
if (NULL != fInvHMatrixB) delete fInvHMatrixB;
if (NULL != fVecMeanS ) delete fVecMeanS;
if (NULL != fVecMeanB ) delete fVecMeanB;
}
void TMVA::MethodHMatrix::DeclareOptions()
{
}
void TMVA::MethodHMatrix::ProcessOptions()
{
MethodBase::ProcessOptions();
CheckForUnusedOptions();
}
void TMVA::MethodHMatrix::Train( void )
{
if (!CheckSanity()) fLogger << kFATAL << "<Train> sanity check failed" << Endl;
ComputeCovariance( kTRUE, fInvHMatrixS );
ComputeCovariance( kFALSE, fInvHMatrixB );
if (TMath::Abs(fInvHMatrixS->Determinant()) < 10E-24) {
fLogger << kWARNING << "<Train> H-matrix S is almost singular with deterinant= "
<< TMath::Abs(fInvHMatrixS->Determinant())
<< " did you use the variables that are linear combinations or highly correlated ???"
<< Endl;
}
if (TMath::Abs(fInvHMatrixB->Determinant()) < 10E-24) {
fLogger << kWARNING << "<Train> H-matrix B is almost singular with deterinant= "
<< TMath::Abs(fInvHMatrixB->Determinant())
<< " did you use the variables that are linear combinations or highly correlated ???"
<< Endl;
}
if (TMath::Abs(fInvHMatrixS->Determinant()) < 10E-120) {
fLogger << kFATAL << "<Train> H-matrix S is singular with deterinant= "
<< TMath::Abs(fInvHMatrixS->Determinant())
<< " did you use the variables that are linear combinations ???"
<< Endl;
}
if (TMath::Abs(fInvHMatrixB->Determinant()) < 10E-120) {
fLogger << kFATAL << "<Train> H-matrix B is singular with deterinant= "
<< TMath::Abs(fInvHMatrixB->Determinant())
<< " did you use the variables that are linear combinations ???"
<< Endl;
}
fInvHMatrixS->Invert();
fInvHMatrixB->Invert();
}
void TMVA::MethodHMatrix::ComputeCovariance( Bool_t isSignal, TMatrixD* mat )
{
const UInt_t nvar = GetNvar();
UInt_t ivar, jvar;
TVectorD vec(nvar); vec *= 0;
TMatrixD mat2(nvar, nvar); mat2 *= 0;
Double_t sumOfWeights = 0;
Double_t *xval = new Double_t[nvar];
for (Int_t i=0; i<Data().GetNEvtTrain(); i++) {
ReadTrainingEvent(i);
if (IsSignalEvent() != isSignal) continue;
Double_t weight = GetEventWeight();
sumOfWeights += weight;
for (ivar=0; ivar<nvar; ivar++) xval[ivar] = GetEventVal(ivar);
for (ivar=0; ivar<nvar; ivar++) {
vec(ivar) += xval[ivar]*weight;
mat2(ivar, ivar) += (xval[ivar]*xval[ivar])*weight;
for (jvar=ivar+1; jvar<nvar; jvar++) {
mat2(ivar, jvar) += (xval[ivar]*xval[jvar])*weight;
mat2(jvar, ivar) = mat2(ivar, jvar);
}
}
}
for (ivar=0; ivar<nvar; ivar++) {
if (isSignal) (*fVecMeanS)(ivar) = vec(ivar)/sumOfWeights;
else (*fVecMeanB)(ivar) = vec(ivar)/sumOfWeights;
for (jvar=0; jvar<nvar; jvar++) {
(*mat)(ivar, jvar) = mat2(ivar, jvar)/sumOfWeights - vec(ivar)*vec(jvar)/(sumOfWeights*sumOfWeights);
}
}
delete [] xval;
}
Double_t TMVA::MethodHMatrix::GetMvaValue()
{
Double_t s = GetChi2( Types::kSignal );
Double_t b = GetChi2( Types::kBackground );
if (s+b < 0) fLogger << kFATAL << "big trouble: s+b: " << s+b << Endl;
return (b - s)/(s + b);
}
Double_t TMVA::MethodHMatrix::GetChi2( TMVA::Event* e, Types::ESBType type ) const
{
Int_t ivar,jvar;
vector<Double_t> val( GetNvar() );
for (ivar=0; ivar<GetNvar(); ivar++) {
val[ivar] = e->GetVal(ivar);
if (IsNormalised()) val[ivar] = gTools().NormVariable( val[ivar], GetXmin( ivar ), GetXmax( ivar ) );
}
Double_t chi2 = 0;
for (ivar=0; ivar<GetNvar(); ivar++) {
for (jvar=0; jvar<GetNvar(); jvar++) {
if (type == Types::kSignal)
chi2 += ( (val[ivar] - (*fVecMeanS)(ivar))*(val[jvar] - (*fVecMeanS)(jvar))
* (*fInvHMatrixS)(ivar,jvar) );
else
chi2 += ( (val[ivar] - (*fVecMeanB)(ivar))*(val[jvar] - (*fVecMeanB)(jvar))
* (*fInvHMatrixB)(ivar,jvar) );
}
}
if (chi2 < 0) fLogger << kFATAL << "<GetChi2> negative chi2: " << chi2 << Endl;
return chi2;
}
Double_t TMVA::MethodHMatrix::GetChi2( Types::ESBType type ) const
{
Int_t ivar,jvar;
vector<Double_t> val( GetNvar() );
for (ivar=0; ivar<GetNvar(); ivar++) val[ivar] = GetEventVal( ivar );
Double_t chi2 = 0;
for (ivar=0; ivar<GetNvar(); ivar++) {
for (jvar=0; jvar<GetNvar(); jvar++) {
if (type == Types::kSignal)
chi2 += ( (val[ivar] - (*fVecMeanS)(ivar))*(val[jvar] - (*fVecMeanS)(jvar))
* (*fInvHMatrixS)(ivar,jvar) );
else
chi2 += ( (val[ivar] - (*fVecMeanB)(ivar))*(val[jvar] - (*fVecMeanB)(jvar))
* (*fInvHMatrixB)(ivar,jvar) );
}
}
if (chi2 < 0) fLogger << kFATAL << "<GetChi2> negative chi2: " << chi2 << Endl;
return chi2;
}
void TMVA::MethodHMatrix::WriteWeightsToStream( ostream& o ) const
{
Int_t ivar,jvar;
o << this->GetMethodName() <<endl;
for (ivar=0; ivar<GetNvar(); ivar++) {
o << (*fVecMeanS)(ivar) << " " << (*fVecMeanB)(ivar) << endl;
}
for (ivar=0; ivar<GetNvar(); ivar++) {
for (jvar=0; jvar<GetNvar(); jvar++) {
o << (*fInvHMatrixS)(ivar,jvar) << " ";
}
o << endl;
}
for (ivar=0; ivar<GetNvar(); ivar++) {
for (jvar=0; jvar<GetNvar(); jvar++) {
o << (*fInvHMatrixB)(ivar,jvar) << " ";
}
o << endl;
}
}
void TMVA::MethodHMatrix::ReadWeightsFromStream( istream& istr )
{
Int_t ivar,jvar;
TString var, dummy;
istr >> dummy;
this->SetMethodName(dummy);
for (ivar=0; ivar<GetNvar(); ivar++)
istr >> (*fVecMeanS)(ivar) >> (*fVecMeanB)(ivar);
for (ivar=0; ivar<GetNvar(); ivar++)
for (jvar=0; jvar<GetNvar(); jvar++)
istr >> (*fInvHMatrixS)(ivar,jvar);
for (ivar=0; ivar<GetNvar(); ivar++)
for (jvar=0; jvar<GetNvar(); jvar++)
istr >> (*fInvHMatrixB)(ivar,jvar);
}
void TMVA::MethodHMatrix::MakeClassSpecific( std::ostream& fout, const TString& className ) const
{
fout << " // arrays of input evt vs. variable " << endl;
fout << " double fInvHMatrixS[" << GetNvar() << "][" << GetNvar() << "]; // inverse H-matrix (signal)" << endl;
fout << " double fInvHMatrixB[" << GetNvar() << "][" << GetNvar() << "]; // inverse H-matrix (background)" << endl;
fout << " double fVecMeanS[" << GetNvar() << "]; // vector of mean values (signal)" << endl;
fout << " double fVecMeanB[" << GetNvar() << "]; // vector of mean values (background)" << endl;
fout << " " << endl;
fout << " double GetChi2( const std::vector<double>& inputValues, int type ) const;" << endl;
fout << "};" << endl;
fout << " " << endl;
fout << "void " << className << "::Initialize() " << endl;
fout << "{" << endl;
fout << " // init vectors with mean values" << endl;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fout << " fVecMeanS[" << ivar << "] = " << (*fVecMeanS)(ivar) << ";" << endl;
fout << " fVecMeanB[" << ivar << "] = " << (*fVecMeanB)(ivar) << ";" << endl;
}
fout << " " << endl;
fout << " // init H-matrices" << endl;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
for (Int_t jvar=0; jvar<GetNvar(); jvar++) {
fout << " fInvHMatrixS[" << ivar << "][" << jvar << "] = "
<< (*fInvHMatrixS)(ivar,jvar) << ";" << endl;
fout << " fInvHMatrixB[" << ivar << "][" << jvar << "] = "
<< (*fInvHMatrixB)(ivar,jvar) << ";" << endl;
}
}
fout << "}" << endl;
fout << " " << endl;
fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << endl;
fout << "{" << endl;
fout << " // returns the H-matrix signal estimator" << endl;
fout << " double s = GetChi2( inputValues, " << Types::kSignal << " );" << endl;
fout << " double b = GetChi2( inputValues, " << Types::kBackground << " );" << endl;
fout << " " << endl;
fout << " if (s+b <= 0) std::cout << \"Problem in class " << className << "::GetMvaValue__: s+b = \"" << endl;
fout << " << s+b << \" <= 0 \" << std::endl;" << endl;
fout << " " << endl;
fout << " return (b - s)/(s + b);" << endl;
fout << "}" << endl;
fout << " " << endl;
fout << "inline double " << className << "::GetChi2( const std::vector<double>& inputValues, int type ) const" << endl;
fout << "{" << endl;
fout << " // compute chi2-estimator for event according to type (signal/background)" << endl;
fout << " " << endl;
fout << " size_t ivar,jvar;" << endl;
fout << " double chi2 = 0;" << endl;
fout << " for (ivar=0; ivar<GetNvar(); ivar++) {" << endl;
fout << " for (jvar=0; jvar<GetNvar(); jvar++) {" << endl;
fout << " if (type == " << Types::kSignal << ") " << endl;
fout << " chi2 += ( (inputValues[ivar] - fVecMeanS[ivar])*(inputValues[jvar] - fVecMeanS[jvar])" << endl;
fout << " * fInvHMatrixS[ivar][jvar] );" << endl;
fout << " else" << endl;
fout << " chi2 += ( (inputValues[ivar] - fVecMeanB[ivar])*(inputValues[jvar] - fVecMeanB[jvar])" << endl;
fout << " * fInvHMatrixB[ivar][jvar] );" << endl;
fout << " }" << endl;
fout << " } // loop over variables " << endl;
fout << " " << endl;
fout << " // sanity check" << endl;
fout << " if (chi2 < 0) std::cout << \"Problem in class " << className << "::GetChi2: chi2 = \"" << endl;
fout << " << chi2 << \" < 0 \" << std::endl;" << endl;
fout << " " << endl;
fout << " return chi2;" << endl;
fout << "}" << endl;
fout << " " << endl;
fout << "// Clean up" << endl;
fout << "inline void " << className << "::Clear() " << endl;
fout << "{" << endl;
fout << " // nothing to clear" << endl;
fout << "}" << endl;
}
void TMVA::MethodHMatrix::GetHelpMessage() const
{
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "The H-Matrix classifier discriminates one class (signal) of a feature" << Endl;
fLogger << "vector from another (background). The correlated elements of the" << Endl;
fLogger << "vector are assumed to be Gaussian distributed, and the inverse of" << Endl;
fLogger << "the covariance matrix is the H-Matrix. A multivariate chi-squared" << Endl;
fLogger << "estimator is built that exploits differences in the mean values of" << Endl;
fLogger << "the vector elements between the two classes for the purpose of" << Endl;
fLogger << "discrimination." << Endl;
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "The TMVA implementation of the H-Matrix classifier has been shown" << Endl;
fLogger << "to underperform in comparison with the corresponding Fisher discriminant," << Endl;
fLogger << "when using similar assumptions and complexity. Its use is therefore" << Endl;
fLogger << "depreciated. Only in cases where the background model is strongly" << Endl;
fLogger << "non-Gaussian, H-Matrix may perform better than Fisher. In such" << Endl;
fLogger << "occurrences the user is advised to employ non-linear classifiers. " << Endl;
fLogger << Endl;
fLogger << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
fLogger << Endl;
fLogger << "None" << Endl;
}
Last change: Sat Nov 1 10:21:47 2008
Last generated: 2008-11-01 10:21
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.