#include "Riostream.h"
#include "TPRegexp.h"
#include "TObjArray.h"
#include "TObjString.h"
#include "TError.h"
#include <pcre.h>
#include <vector>
struct PCREPriv_t {
pcre *fPCRE;
pcre_extra *fPCREExtra;
PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
};
ClassImp(TPRegexp)
TPRegexp::TPRegexp()
{
fPriv = new PCREPriv_t;
fPCREOpts = 0;
}
TPRegexp::TPRegexp(const TString &pat)
{
fPattern = pat;
fPriv = new PCREPriv_t;
fPCREOpts = 0;
}
TPRegexp::TPRegexp(const TPRegexp &p)
{
fPattern = p.fPattern;
fPriv = new PCREPriv_t;
fPCREOpts = p.fPCREOpts;
}
TPRegexp::~TPRegexp()
{
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
delete fPriv;
}
TPRegexp &TPRegexp::operator=(const TPRegexp &p)
{
if (this != &p) {
fPattern = p.fPattern;
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
fPriv->fPCRE = 0;
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
fPriv->fPCREExtra = 0;
fPCREOpts = p.fPCREOpts;
}
return *this;
}
UInt_t TPRegexp::ParseMods(const TString &modStr) const
{
UInt_t opts = 0;
if (modStr.Length() <= 0)
return fPCREOpts;
const char *m = modStr;
while (*m) {
switch (*m) {
case 'g':
opts |= kPCRE_GLOBAL;
break;
case 'i':
opts |= PCRE_CASELESS;
break;
case 'm':
opts |= PCRE_MULTILINE;
break;
case 'o':
opts |= kPCRE_OPTIMIZE;
break;
case 's':
opts |= PCRE_DOTALL;
break;
case 'x':
opts |= PCRE_EXTENDED;
break;
case 'd':
opts |= kPCRE_DEBUG_MSGS;
break;
default:
Error("ParseMods", "illegal pattern modifier: %c", *m);
opts = 0;
}
++m;
}
return opts;
}
TString TPRegexp::GetModifiers() const
{
TString ret;
if (fPCREOpts & kPCRE_GLOBAL) ret += 'g';
if (fPCREOpts & PCRE_CASELESS) ret += 'i';
if (fPCREOpts & PCRE_MULTILINE) ret += 'm';
if (fPCREOpts & PCRE_DOTALL) ret += 's';
if (fPCREOpts & PCRE_EXTENDED) ret += 'x';
if (fPCREOpts & kPCRE_OPTIMIZE) ret += 'o';
if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';
return ret;
}
void TPRegexp::Compile()
{
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
if (fPCREOpts & kPCRE_DEBUG_MSGS)
Info("Compile", "PREGEX compiling %s", fPattern.Data());
const char *errstr;
Int_t patIndex;
fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
&errstr, &patIndex, 0);
if (!fPriv->fPCRE) {
Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
fPattern.Data(), patIndex, errstr);
}
if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
Optimize();
}
void TPRegexp::Optimize()
{
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
if (fPCREOpts & kPCRE_DEBUG_MSGS)
Info("Optimize", "PREGEX studying %s", fPattern.Data());
const char *errstr;
fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, 0, &errstr);
if (!fPriv->fPCREExtra && errstr) {
Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
fPattern.Data(), errstr);
}
}
Int_t TPRegexp::ReplaceSubs(const TString &s, TString &final,
const TString &replacePattern,
Int_t *offVec, Int_t nrMatch) const
{
Int_t nrSubs = 0;
const char *p = replacePattern;
Int_t state = 0;
Int_t subnum = 0;
while (state != -1) {
switch (state) {
case 0:
if (!*p) {
state = -1;
break;
}
if (*p == '$') {
state = 1;
subnum = 0;
if (p[1] == '&') {
p++;
if (isdigit(p[1]))
p++;
} else if (!isdigit(p[1])) {
Error("ReplaceSubs", "badly formed replacement pattern: %s",
replacePattern.Data());
}
} else
final += *p;
break;
case 1:
if (isdigit(*p)) {
subnum *= 10;
subnum += (*p)-'0';
} else {
if (fPCREOpts & kPCRE_DEBUG_MSGS)
Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
if (subnum < 0 || subnum > nrMatch-1) {
Error("ReplaceSubs","bad string number: %d",subnum);
} else {
const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
final += subStr;
nrSubs++;
}
state = 0;
continue;
}
}
p++;
}
return nrSubs;
}
Int_t TPRegexp::MatchInternal(const TString &s, Int_t start,
Int_t nMaxMatch, TArrayI *pos)
{
Int_t *offVec = new Int_t[3*nMaxMatch];
Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
s.Length(), start, 0,
offVec, 3*nMaxMatch);
if (nrMatch == PCRE_ERROR_NOMATCH)
nrMatch = 0;
else if (nrMatch <= 0) {
Error("Match","pcre_exec error = %d", nrMatch);
delete [] offVec;
return 0;
}
if (pos)
pos->Set(2*nrMatch, offVec);
delete [] offVec;
return nrMatch;
}
Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
Int_t nMaxMatch, TArrayI *pos)
{
UInt_t opts = ParseMods(mods);
if (!fPriv->fPCRE || opts != fPCREOpts) {
fPCREOpts = opts;
Compile();
}
return MatchInternal(s, start, nMaxMatch, pos);
}
TObjArray *TPRegexp::MatchS(const TString &s, const TString &mods,
Int_t start, Int_t nMaxMatch)
{
TArrayI pos;
Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
TObjArray *subStrL = new TObjArray();
subStrL->SetOwner();
for (Int_t i = 0; i < nrMatch; i++) {
Int_t startp = pos[2*i];
Int_t stopp = pos[2*i+1];
if (startp >= 0 && stopp >= 0) {
const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
subStrL->Add(new TObjString(subStr));
} else
subStrL->Add(new TObjString());
}
return subStrL;
}
Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
Int_t start, Int_t nMaxMatch,
Bool_t doDollarSubst)
{
Int_t *offVec = new Int_t[3*nMaxMatch];
TString final;
Int_t nrSubs = 0;
Int_t offset = start;
Int_t last = 0;
while (kTRUE) {
Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
s.Length(), offset, 0,
offVec, 3*nMaxMatch);
if (nrMatch == PCRE_ERROR_NOMATCH) {
nrMatch = 0;
break;
} else if (nrMatch <= 0) {
Error("Substitute", "pcre_exec error = %d", nrMatch);
break;
}
if (last <= offVec[0]) {
final += s(last,offVec[0]-last);
last = offVec[1];
}
if (doDollarSubst) {
ReplaceSubs(s, final, replacePattern, offVec, nrMatch);
} else {
final += replacePattern;
}
++nrSubs;
if (!(fPCREOpts & kPCRE_GLOBAL))
break;
if (offVec[0] != offVec[1])
offset = offVec[1];
else {
if (offVec[1] == s.Length())
break;
offset = offVec[1]+1;
}
}
delete [] offVec;
final += s(last,s.Length()-last);
s = final;
return nrSubs;
}
Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
const TString &mods, Int_t start, Int_t nMaxMatch)
{
UInt_t opts = ParseMods(mods);
if (!fPriv->fPCRE || opts != fPCREOpts) {
fPCREOpts = opts;
Compile();
}
return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
}
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start) const
{
TArrayI pos;
Int_t nrMatch = r.Match(*this,"",start,10,&pos);
if (nrMatch > 0)
return pos[0];
else
return -1;
}
Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start) const
{
TArrayI pos;
const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
if (nrMatch > 0) {
*extent = pos[1]-pos[0];
return pos[0];
} else {
*extent = 0;
return -1;
}
}
TSubString TString::operator()(TPRegexp& r, Ssiz_t start)
{
Ssiz_t len;
Ssiz_t begin = Index(r, &len, start);
return TSubString(*this, begin, len);
}
TSubString TString::operator()(TPRegexp& r)
{
return (*this)(r,0);
}
TSubString TString::operator()(TPRegexp& r, Ssiz_t start) const
{
Ssiz_t len;
Ssiz_t begin = Index(r, &len, start);
return TSubString(*this, begin, len);
}
TSubString TString::operator()(TPRegexp& r) const
{
return (*this)(r, 0);
}
ClassImp(TPMERegexp);
TPMERegexp::TPMERegexp() :
TPRegexp(),
fNMaxMatches(10),
fNMatches(0),
fAddressOfLastString(0),
fLastGlobalPosition(0)
{
Compile();
}
TPMERegexp::TPMERegexp(const TString& s, const TString& opts, Int_t nMatchMax) :
TPRegexp(s),
fNMaxMatches(nMatchMax),
fNMatches(0),
fAddressOfLastString(0),
fLastGlobalPosition(0)
{
fPCREOpts = ParseMods(opts);
Compile();
}
TPMERegexp::TPMERegexp(const TString& s, UInt_t opts, Int_t nMatchMax) :
TPRegexp(s),
fNMaxMatches(nMatchMax),
fNMatches(0),
fAddressOfLastString(0),
fLastGlobalPosition(0)
{
fPCREOpts = opts;
Compile();
}
TPMERegexp::TPMERegexp(const TPMERegexp& r) :
TPRegexp(r),
fNMaxMatches(r.fNMaxMatches),
fNMatches(0),
fAddressOfLastString(0),
fLastGlobalPosition(0)
{
Compile();
}
void TPMERegexp::Reset(const TString& s, const TString& opts, Int_t nMatchMax)
{
Reset(s, ParseMods(opts), nMatchMax);
}
void TPMERegexp::Reset(const TString& s, UInt_t opts, Int_t nMatchMax)
{
fPattern = s;
fPCREOpts = opts;
Compile();
if (nMatchMax != -1)
fNMatches = nMatchMax;
fNMatches = 0;
fLastGlobalPosition = 0;
}
void TPMERegexp::AssignGlobalState(const TPMERegexp& re)
{
fLastStringMatched = re.fLastStringMatched;
fLastGlobalPosition = re.fLastGlobalPosition;
}
void TPMERegexp::ResetGlobalState()
{
fLastGlobalPosition = 0;
}
Int_t TPMERegexp::Match(const TString& s, UInt_t start)
{
if (fAddressOfLastString != (void*) &s) {
fLastGlobalPosition = 0;
}
if (fPCREOpts & kPCRE_GLOBAL) {
start += fLastGlobalPosition;
}
fNMatches = MatchInternal(s, start, fNMaxMatches, &fMarkers);
fLastStringMatched = s;
fAddressOfLastString = (void*) &s;
if (fPCREOpts & kPCRE_GLOBAL) {
if (fNMatches == PCRE_ERROR_NOMATCH) {
fLastGlobalPosition = 0;
} else if (fNMatches > 0) {
fLastGlobalPosition = fMarkers[1];
} else {
fLastGlobalPosition = 0;
}
}
return fNMatches;
}
Int_t TPMERegexp::Split(const TString& s, Int_t maxfields)
{
typedef std::pair<int, int> Marker_t;
typedef std::vector<Marker_t> MarkerVec_t;
MarkerVec_t oMarks;
MarkerVec_t oCurrentTrailingEmpties;
Int_t nOffset = 0;
Int_t nMatchesFound = 0;
Int_t matchRes;
while ((matchRes = Match(s, nOffset)) &&
((maxfields < 1) || nMatchesFound < maxfields)) {
++nMatchesFound;
if (fMarkers[1] - fMarkers[0] == 0) {
oMarks.push_back(Marker_t(nOffset, nOffset + 1));
++nOffset;
if (nOffset >= s.Length())
break;
else
continue;
}
if (nOffset != fMarkers[0]) {
if (!oCurrentTrailingEmpties.empty()) {
oMarks.insert(oMarks.end(),
oCurrentTrailingEmpties.begin(),
oCurrentTrailingEmpties.end());
oCurrentTrailingEmpties.clear();
}
oMarks.push_back(Marker_t(nOffset, fMarkers[0]));
} else {
if (maxfields == 0) {
oCurrentTrailingEmpties.push_back(Marker_t(nOffset, nOffset));
} else {
oMarks.push_back(Marker_t(nOffset, nOffset));
}
}
nOffset = fMarkers[1];
if (matchRes > 1) {
for (Int_t i = 1; i < matchRes; ++i)
oMarks.push_back(Marker_t(fMarkers[2*i], fMarkers[2*i + 1]));
}
}
if (nMatchesFound == 0) {
oMarks.push_back(Marker_t(0, s.Length()));
}
else if (maxfields > 0 && nMatchesFound >= maxfields) {
oMarks[oMarks.size() - 1].second = s.Length();
}
else {
Bool_t last_empty = (nOffset == s.Length());
if (!last_empty || maxfields < 0) {
if (!oCurrentTrailingEmpties.empty()) {
oMarks.insert(oMarks.end(),
oCurrentTrailingEmpties.begin(),
oCurrentTrailingEmpties.end());
}
oMarks.push_back(Marker_t(nOffset, s.Length()));
}
}
fNMatches = oMarks.size();
fMarkers.Set(2*fNMatches);
for (Int_t i = 0; i < fNMatches; ++i) {
fMarkers[2*i] = oMarks[i].first;
fMarkers[2*i + 1] = oMarks[i].second;
}
return fNMatches;
}
Int_t TPMERegexp::Substitute(TString& s, const TString& r, Bool_t doDollarSubst)
{
Int_t cnt = SubstituteInternal(s, r, 0, fNMaxMatches, doDollarSubst);
TString ret;
Int_t state = 0;
Ssiz_t pos = 0, len = s.Length();
const Char_t *data = s.Data();
while (pos < len) {
Char_t c = data[pos];
if (c == '\\') {
c = data[pos+1];
switch (c) {
case 0 : ret += '\\'; break;
case 'l': state = 1; break;
case 'u': state = 2; break;
case 'L': state = 3; break;
case 'U': state = 4; break;
case 'E': state = 0; break;
default : ret += '\\'; ret += c; break;
}
pos += 2;
} else {
switch (state) {
case 0: ret += c; break;
case 1: ret += (Char_t) tolower(c); state = 0; break;
case 2: ret += (Char_t) toupper(c); state = 0; break;
case 3: ret += (Char_t) tolower(c); break;
case 4: ret += (Char_t) toupper(c); break;
default: Error("TPMERegexp::Substitute", "invalid state.");
}
++pos;
}
}
s = ret;
return cnt;
}
TString TPMERegexp::operator[](int index)
{
if (index >= fNMatches)
return "";
Int_t begin = fMarkers[2*index];
Int_t end = fMarkers[2*index + 1];
return fLastStringMatched(begin, end-begin);
}
void TPMERegexp::Print(Option_t* option)
{
TString opt = option;
opt.ToLower();
Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
if (opt.Contains("all")) {
Printf(" last string='%s'", fLastStringMatched.Data());
Printf(" number of matches = %d", fNMatches);
for (Int_t i=0; i<fNMatches; ++i)
Printf(" %d - %s", i, operator[](i).Data());
}
}
ClassImp(TStringToken)
TStringToken::TStringToken(const TString& fullStr, const TString& splitRe, Bool_t retVoid) :
fFullStr (fullStr),
fSplitRe (splitRe),
fReturnVoid (retVoid),
fPos (0)
{
}
Bool_t TStringToken::NextToken()
{
TArrayI x;
while (fPos < fFullStr.Length()) {
if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
TString::operator=(fFullStr(fPos, x[0] - fPos));
fPos = x[1];
} else {
TString::operator=(fFullStr(fPos, fFullStr.Length() - fPos));
fPos = fFullStr.Length() + 1;
}
if (Length() || fReturnVoid)
return kTRUE;
}
if (fPos == fFullStr.Length() && fReturnVoid) {
TString::operator=("");
fPos = fFullStr.Length() + 1;
return kTRUE;
}
return kFALSE;
}
Last change: Tue Dec 9 09:01:12 2008
Last generated: 2008-12-09 09:01
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.