#include "TString.h"
#include <vector>
#include "TTree.h"
#include "Riostream.h"
#include "TFitter.h"
#include "TMVA/Interval.h"
#include "TMVA/MethodMLP.h"
#include "TMVA/TNeuron.h"
#include "TMVA/TSynapse.h"
#include "TMVA/Timer.h"
#include "TMVA/Types.h"
#include "TMVA/Tools.h"
#include "TMVA/GeneticFitter.h"
#include "TMVA/Config.h"
#ifdef MethodMLP_UseMinuit__
TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
Bool_t MethodMLP_UseMinuit = kTRUE;
#endif
using std::vector;
ClassImp(TMVA::MethodMLP)
TMVA::MethodMLP::MethodMLP( const TString& jobName, const TString& methodTitle, DataSet& theData,
const TString& theOption, TDirectory* theTargetDir )
: MethodANNBase( jobName, methodTitle, theData, theOption, theTargetDir )
{
InitMLP();
SetConfigName( TString("Method") + GetMethodName() );
DeclareOptions();
ParseOptions();
ProcessOptions();
InitializeLearningRates();
if (fBPMode == kBatch) {
Int_t numEvents = Data().GetNEvtTrain();
if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
}
}
TMVA::MethodMLP::MethodMLP( DataSet& theData, const TString& theWeightFile, TDirectory* theTargetDir )
: MethodANNBase( theData, theWeightFile, theTargetDir )
{
InitMLP();
DeclareOptions();
}
TMVA::MethodMLP::~MethodMLP()
{
}
void TMVA::MethodMLP::InitMLP()
{
SetMethodName( "MLP" );
SetMethodType( Types::kMLP );
SetTestvarName();
SetSignalReferenceCut( 0.5 );
}
void TMVA::MethodMLP::DeclareOptions()
{
DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
"Train with Back-Propagation (BP - default) or Genetic Algorithm (GA - slower and worse)");
AddPreDefVal(TString("BP"));
AddPreDefVal(TString("GA"));
DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
DeclareOptionRef(fBpModeS="sequential", "BPMode",
"Back-propagation learning mode: sequential or batch");
AddPreDefVal(TString("sequential"));
AddPreDefVal(TString("batch"));
DeclareOptionRef(fBatchSize=-1, "BatchSize",
"Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
}
void TMVA::MethodMLP::ProcessOptions()
{
MethodANNBase::ProcessOptions();
if (fTrainMethodS == "BP") fTrainingMethod = kBP;
else if (fTrainMethodS == "GA") fTrainingMethod = kGA;
if (fBpModeS == "sequential") fBPMode = kSequential;
else if (fBpModeS == "batch") fBPMode = kBatch;
}
void TMVA::MethodMLP::InitializeLearningRates()
{
TSynapse *synapse;
Int_t numSynapses = fSynapses->GetEntriesFast();
for (Int_t i = 0; i < numSynapses; i++) {
synapse = (TSynapse*)fSynapses->At(i);
synapse->SetLearningRate(fLearnRate);
}
}
Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType )
{
Int_t nEvents = ( (treeType == Types::kTesting ) ? Data().GetNEvtTest() :
(treeType == Types::kTraining) ? Data().GetNEvtTrain() : -1 );
if (nEvents <=0)
fLogger << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
Double_t estimator = 0;
for (Int_t i = 0; i < nEvents; i++) {
if (treeType == Types::kTesting )
ReadTestEvent(i);
else
ReadTrainingEvent(i);
Double_t desired = GetDesiredOutput();
ForceNetworkInputs();
ForceNetworkCalculations();
Double_t d = GetOutputNeuron()->GetActivationValue() - desired;
estimator += (d*d);
}
estimator = estimator*0.5/Float_t(nEvents);
return estimator;
}
void TMVA::MethodMLP::Train(Int_t nEpochs)
{
PrintMessage("Training Network");
Timer timer( nEpochs, GetName() );
#ifdef MethodMLP_UseMinuit__
if (useMinuit) MinuitMinimize();
#else
if (fTrainingMethod == kGA) GeneticMinimize();
else BackPropagationMinimize(nEpochs);
#endif
PrintMessage("Train: elapsed time: " + timer.GetElapsedTime() + " ", kTRUE);
}
void TMVA::MethodMLP::BackPropagationMinimize(Int_t nEpochs)
{
Timer timer( nEpochs, GetName() );
Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
Int_t nbinTest = Int_t(nEpochs/fTestRate);
fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
for (Int_t i = 0; i < nEpochs; i++) {
timer.DrawProgressBar(i);
TrainOneEpoch();
DecaySynapseWeights(i >= lateEpoch);
if ((i+1)%fTestRate == 0) {
Double_t trainE = CalculateEstimator( Types::kTraining );
Double_t testE = CalculateEstimator( Types::kTesting );
fEstimatorHistTrain->Fill( i+1, trainE );
fEstimatorHistTest ->Fill( i+1, testE );
}
}
}
void TMVA::MethodMLP::TrainOneEpoch()
{
Int_t nEvents = Data().GetNEvtTrain();
Int_t* index = new Int_t[nEvents];
for (Int_t i = 0; i < nEvents; i++) index[i] = i;
Shuffle(index, nEvents);
for (Int_t i = 0; i < nEvents; i++) {
TrainOneEvent(index[i]);
if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
AdjustSynapseWeights();
if (fgPRINT_BATCH) {
PrintNetwork();
WaitForKeyboard();
}
}
if (fgPRINT_SEQ) {
PrintNetwork();
WaitForKeyboard();
}
}
delete[] index;
}
void TMVA::MethodMLP::Shuffle(Int_t* index, Int_t n)
{
Int_t j, k;
Int_t a = n - 1;
for (Int_t i = 0; i < n; i++) {
j = (Int_t) (frgen->Rndm() * a);
k = index[j];
index[j] = index[i];
index[i] = k;
}
}
void TMVA::MethodMLP::DecaySynapseWeights(Bool_t lateEpoch)
{
TSynapse* synapse;
Int_t numSynapses = fSynapses->GetEntriesFast();
for (Int_t i = 0; i < numSynapses; i++) {
synapse = (TSynapse*)fSynapses->At(i);
if (lateEpoch) synapse->DecayLearningRate(fDecayRate*fDecayRate);
else synapse->DecayLearningRate(fDecayRate);
}
}
void TMVA::MethodMLP::TrainOneEventFast(Int_t ievt, Float_t*& branchVar, Int_t& type)
{
ReadTrainingEvent(ievt);
Double_t eventWeight = 1.0;
Double_t desired;
if (type == 0) desired = fActivation->GetMin();
else desired = fActivation->GetMax();
Double_t x;
TNeuron* neuron;
for (Int_t j = 0; j < GetNvar(); j++) {
x = branchVar[j];
if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
neuron = GetInputNeuron(j);
neuron->ForceValue(x);
}
ForceNetworkCalculations();
UpdateNetwork(desired, eventWeight);
}
void TMVA::MethodMLP::TrainOneEvent(Int_t ievt)
{
ReadTrainingEvent(ievt);
Double_t eventWeight = GetEventWeight();
Double_t desired = GetDesiredOutput();
ForceNetworkInputs();
ForceNetworkCalculations();
UpdateNetwork( desired, eventWeight );
}
Double_t TMVA::MethodMLP::GetDesiredOutput()
{
Double_t desired;
if (IsSignalEvent()) desired = fActivation->GetMax();
else desired = fActivation->GetMin();
return desired;
}
void TMVA::MethodMLP::UpdateNetwork(Double_t desired, Double_t eventWeight)
{
Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
error *= eventWeight;
GetOutputNeuron()->SetError(error);
CalculateNeuronDeltas();
UpdateSynapses();
}
void TMVA::MethodMLP::CalculateNeuronDeltas()
{
TNeuron* neuron;
Int_t numNeurons;
TObjArray* curLayer;
Int_t numLayers = fNetwork->GetEntriesFast();
for (Int_t i = numLayers-1; i >= 0; i--) {
curLayer = (TObjArray*)fNetwork->At(i);
numNeurons = curLayer->GetEntriesFast();
for (Int_t j = 0; j < numNeurons; j++) {
neuron = (TNeuron*) curLayer->At(j);
neuron->CalculateDelta();
}
}
}
void TMVA::MethodMLP::GeneticMinimize()
{
PrintMessage("Minimizing Estimator with GA");
fGA_preCalc = 1;
fGA_SC_steps = 10;
fGA_SC_rate = 5;
fGA_SC_factor = 0.95;
fGA_nsteps = 30;
vector<Interval*> ranges;
Int_t numWeights = fSynapses->GetEntriesFast();
for (Int_t ivar=0; ivar< numWeights; ivar++) {
ranges.push_back( new Interval( -3.0, 3.0 ) );
}
FitterBase *gf = new GeneticFitter( *this, fLogger.GetPrintedSource(), ranges, GetOptions() );
gf->Run();
Double_t estimator = CalculateEstimator();
fLogger << kINFO << "GA: estimator after optimization: " << estimator << Endl;
}
Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
{
return ComputeEstimator( parameters );
}
Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
{
TSynapse* synapse;
Int_t numSynapses = fSynapses->GetEntriesFast();
for (Int_t i = 0; i < numSynapses; i++) {
synapse = (TSynapse*)fSynapses->At(i);
synapse->SetWeight(parameters.at(i));
}
Double_t estimator = CalculateEstimator();
return estimator;
}
void TMVA::MethodMLP::UpdateSynapses()
{
TNeuron* neuron;
Int_t numNeurons;
TObjArray* curLayer;
Int_t numLayers = fNetwork->GetEntriesFast();
for (Int_t i = 0; i < numLayers; i++) {
curLayer = (TObjArray*)fNetwork->At(i);
numNeurons = curLayer->GetEntriesFast();
for (Int_t j = 0; j < numNeurons; j++) {
neuron = (TNeuron*) curLayer->At(j);
if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
else neuron->UpdateSynapsesSequential();
}
}
}
void TMVA::MethodMLP::AdjustSynapseWeights()
{
TNeuron* neuron;
Int_t numNeurons;
TObjArray* curLayer;
Int_t numLayers = fNetwork->GetEntriesFast();
for (Int_t i = numLayers-1; i >= 0; i--) {
curLayer = (TObjArray*)fNetwork->At(i);
numNeurons = curLayer->GetEntriesFast();
for (Int_t j = 0; j < numNeurons; j++) {
neuron = (TNeuron*) curLayer->At(j);
neuron->AdjustSynapseWeights();
}
}
}
#ifdef MethodMLP_UseMinuit__
void TMVA::MethodMLP::MinuitMinimize()
{
fNumberOfWeights = fSynapses->GetEntriesFast();
TFitter* tfitter = new TFitter( fNumberOfWeights );
Double_t args[10];
args[0] = 2;
tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
double w[54];
for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
TString parName = Form("w%i", ipar);
tfitter->SetParameter( ipar,
parName, w[ipar], 0.1, 0, 0 );
}
tfitter->SetFCN( &IFCN );
args[0] = 2;
tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
args[0] = 1e-04;
tfitter->ExecuteCommand( "MIGRAD", args, 1 );
Bool_t doBetter = kFALSE;
Bool_t doEvenBetter = kFALSE;
if (doBetter) {
args[0] = 1e-04;
tfitter->ExecuteCommand( "IMPROVE", args, 1 );
if (doEvenBetter) {
args[0] = 500;
tfitter->ExecuteCommand( "MINOS", args, 1 );
}
}
}
_____________________________________________________________________________
void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
{
((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
}
static Int_t nc = 0;
static double minf = 1000000;
void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
{
for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
synapse->SetWeight(fitPars[ipar]);
}
f = CalculateEstimator();
nc++;
if (f < minf) minf = f;
for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) fLogger << kVERBOSE << fitPars[ipar] << " ";
fLogger << kVERBOSE << Endl;
fLogger << kVERBOSE << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
}
#endif
void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
{
MethodANNBase::MakeClassSpecific(fout, className);
}
void TMVA::MethodMLP::GetHelpMessage() const
{
TString col = gConfig().WriteOptionsReference() ? "" : gTools().Color("bold");
TString colres = gConfig().WriteOptionsReference() ? "" : gTools().Color("reset");
fLogger << Endl;
fLogger << col << "--- Short description:" << colres << Endl;
fLogger << Endl;
fLogger << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
fLogger << "forward multilayer perceptron impementation. The MLP has a user-" << Endl;
fLogger << "defined hidden layer architecture, while the number of input (output)" << Endl;
fLogger << "nodes is determined by the input variables (output classes, i.e., " << Endl;
fLogger << "signal and one background). " << Endl;
fLogger << Endl;
fLogger << col << "--- Performance optimisation:" << colres << Endl;
fLogger << Endl;
fLogger << "Neural networks are stable and performing for a large variety of " << Endl;
fLogger << "linear and non-linear classification problems. However, in contrast" << Endl;
fLogger << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
fLogger << "number of input variables that have only little discrimination power. " << Endl;
fLogger << "" << Endl;
fLogger << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
fLogger << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
fLogger << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
fLogger << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
fLogger << "tests, which is partly due to the slow convergence of its training" << Endl;
fLogger << "(at least 10k training cycles are required to achieve approximately" << Endl;
fLogger << "competitive results)." << Endl;
fLogger << Endl;
fLogger << col << "Overtraining: " << colres
<< "only the TMlpANN performs an explicit separation of the" << Endl;
fLogger << "full training sample into independent training and validation samples." << Endl;
fLogger << "We have found that in most high-energy physics applications the " << Endl;
fLogger << "avaliable degrees of freedom (training events) are sufficient to " << Endl;
fLogger << "constrain the weights of the relatively simple architectures required" << Endl;
fLogger << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
fLogger << "the use of validation samples would only reduce the available training" << Endl;
fLogger << "information. However, if the perrormance on the training sample is " << Endl;
fLogger << "found to be significantly better than the one found with the inde-" << Endl;
fLogger << "pendent test sample, caution is needed. The results for these samples " << Endl;
fLogger << "are printed to standard output at the end of each training job." << Endl;
fLogger << Endl;
fLogger << col << "--- Performance tuning via configuration options:" << colres << Endl;
fLogger << Endl;
fLogger << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
fLogger << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
fLogger << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
fLogger << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
fLogger << "in favour of more neurons in the first hidden layer." << Endl;
fLogger << "" << Endl;
fLogger << "The number of cycles should be above 500. As said, if the number of" << Endl;
fLogger << "adjustable weights is small compared to the training sample size," << Endl;
fLogger << "using a large number of training samples should not lead to overtraining." << Endl;
}
Last change: Sat Nov 1 10:21:51 2008
Last generated: 2008-11-01 10:21
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.