v1.9.4
FIXED: * remap.ReMap.MapString() was not properly correllating groups. It is now.
This commit is contained in:
parent
bae0abe960
commit
07951f1f03
@ -135,7 +135,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
|
|||||||
if len(matchBytes) == 0 || len(matchBytes) == 1 {
|
if len(matchBytes) == 0 || len(matchBytes) == 1 {
|
||||||
/*
|
/*
|
||||||
no submatches whatsoever.
|
no submatches whatsoever.
|
||||||
*technically* I don't think this condition can actually be reached.
|
*Technically* I don't think this condition can actually be reached.
|
||||||
This is more of a safe-return before we re-slice.
|
This is more of a safe-return before we re-slice.
|
||||||
*/
|
*/
|
||||||
matches = make(map[string][][]byte)
|
matches = make(map[string][][]byte)
|
||||||
@ -308,6 +308,13 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
|
|||||||
var grpNm string
|
var grpNm string
|
||||||
var names []string
|
var names []string
|
||||||
var matchStr string
|
var matchStr string
|
||||||
|
/*
|
||||||
|
A slice of indices or index pairs.
|
||||||
|
For each element `e` in idxChunks,
|
||||||
|
* if `e` is nil, no group match.
|
||||||
|
* if len(e) == 1, only a single character was matched.
|
||||||
|
* otherwise len(e) == 2, the start and end of the match.
|
||||||
|
*/
|
||||||
var idxChunks [][]int
|
var idxChunks [][]int
|
||||||
var matchIndices []int
|
var matchIndices []int
|
||||||
var chunkIndices []int // always 2 elements; start pos and end pos
|
var chunkIndices []int // always 2 elements; start pos and end pos
|
||||||
@ -317,7 +324,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
|
|||||||
OK so this is a bit of a deviation.
|
OK so this is a bit of a deviation.
|
||||||
|
|
||||||
It's not as straightforward as above, because there isn't an explicit way
|
It's not as straightforward as above, because there isn't an explicit way
|
||||||
like above to determine if a patterb was *matched as an empty string* vs.
|
like above to determine if a pattern was *matched as an empty string* vs.
|
||||||
*not matched*.
|
*not matched*.
|
||||||
|
|
||||||
So instead do roundabout index-y things.
|
So instead do roundabout index-y things.
|
||||||
@ -326,73 +333,111 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
|
|||||||
if s == "" {
|
if s == "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
names = r.Regexp.SubexpNames()
|
/*
|
||||||
|
I'm not entirely sure how serious they are about "the slice should not be modified"...
|
||||||
|
|
||||||
|
DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
|
||||||
|
it will be duplicated here in proper order and the ordering is tied to
|
||||||
|
the ordering of matchIndices.
|
||||||
|
*/
|
||||||
|
names = r.Regexp.SubexpNames()[:]
|
||||||
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
|
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
|
||||||
|
|
||||||
if matchIndices == nil {
|
if matchIndices == nil {
|
||||||
// s does not match pattern
|
// s does not match pattern at all.
|
||||||
if !mustMatch {
|
if !mustMatch {
|
||||||
matches = make(map[string][]string)
|
matches = make(map[string][]string)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if names == nil || len(names) == 0 || len(names) == 1 {
|
if names == nil || len(names) <= 1 {
|
||||||
/*
|
/*
|
||||||
no named capture groups;
|
No named capture groups;
|
||||||
technically only the last condition would be the case.
|
technically only the last condition would be the case,
|
||||||
|
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
|
||||||
|
return a `[]string{""}`.
|
||||||
*/
|
*/
|
||||||
if inclNoMatch {
|
if inclNoMatch {
|
||||||
matches = make(map[string][]string)
|
matches = make(map[string][]string)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
names = names[1:]
|
|
||||||
|
|
||||||
if len(matchIndices) == 0 || len(matchIndices) == 1 {
|
if len(matchIndices) == 0 || len(matchIndices) == 1 {
|
||||||
/*
|
/*
|
||||||
no submatches whatsoever.
|
No (sub)matches whatsoever.
|
||||||
*technically* I don't think this condition can actually be reached.
|
*technically* I don't think this condition can actually be reached;
|
||||||
|
matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
|
||||||
|
and modulo 2 thereafter since they're PAIRS of indices...
|
||||||
|
Why they didn't just return a [][]int or [][2]int or something
|
||||||
|
instead of an []int, who knows.
|
||||||
|
But we're correcting that poor design.
|
||||||
This is more of a safe-return before we chunk the indices.
|
This is more of a safe-return before we chunk the indices.
|
||||||
*/
|
*/
|
||||||
matches = make(map[string][]string)
|
matches = make(map[string][]string)
|
||||||
if inclNoMatch {
|
if inclNoMatch {
|
||||||
if len(names) >= 1 {
|
for _, grpNm = range names {
|
||||||
for _, grpNm = range names {
|
if grpNm != "" {
|
||||||
matches[grpNm] = nil
|
matches[grpNm] = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
The reslice starts at 2 because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
|
|
||||||
and the first *pair* is the entire pattern match.
|
|
||||||
Thus the len(matchIndices) == 2*len(names).
|
|
||||||
Keep in mind that since the first element of names is removed,
|
|
||||||
the first pair here is also removed.
|
|
||||||
*/
|
|
||||||
matchIndices = matchIndices[2:]
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
|
||||||
|
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
|
||||||
|
and the first pair is the entire pattern match (un-resliced names[0]).
|
||||||
|
Thus the len(matchIndices) == 2*len(names), *even* if you
|
||||||
|
Keep in mind that since the first element of names is removed,
|
||||||
|
the first pair here is skipped.
|
||||||
|
This provides a bit more consistent readability, though.
|
||||||
|
*/
|
||||||
idxChunks = make([][]int, len(names))
|
idxChunks = make([][]int, len(names))
|
||||||
for startIdx = 0; startIdx < len(idxChunks); startIdx += 2 {
|
chunkIdx = 0
|
||||||
|
endIdx = 0
|
||||||
|
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
|
||||||
endIdx = startIdx + 2
|
endIdx = startIdx + 2
|
||||||
|
// This technically should never happen.
|
||||||
|
if endIdx > len(matchIndices) {
|
||||||
|
endIdx = len(matchIndices)
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkIndices = matchIndices[startIdx:endIdx]
|
||||||
|
|
||||||
|
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
|
||||||
|
// group did not match
|
||||||
|
chunkIndices = nil
|
||||||
|
} else {
|
||||||
|
if chunkIndices[0] == chunkIndices[1] {
|
||||||
|
chunkIndices = []int{chunkIndices[0]}
|
||||||
|
} else {
|
||||||
|
chunkIndices = matchIndices[startIdx:endIdx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idxChunks[chunkIdx] = chunkIndices
|
||||||
|
chunkIdx++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now associate with names and pull the string sequence.
|
||||||
|
for chunkIdx, chunkIndices = range idxChunks {
|
||||||
grpNm = names[chunkIdx]
|
grpNm = names[chunkIdx]
|
||||||
/*
|
/*
|
||||||
Thankfully, it's actually a build error if a pattern specifies a named
|
Thankfully, it's actually a build error if a pattern specifies a named
|
||||||
capture group with an empty name.
|
capture group with an empty name.
|
||||||
So we don't need to worry about accounting for that,
|
So we don't need to worry about accounting for that,
|
||||||
and can just skip over grpNm == "" (which is an *unnamed* capture group).
|
and can just skip over grpNm == ""
|
||||||
|
(which is either an *unnamed* capture group
|
||||||
|
OR the first element in `names`, which is always
|
||||||
|
the entire match).
|
||||||
*/
|
*/
|
||||||
if grpNm == "" {
|
if grpNm == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// This technically should never happen.
|
|
||||||
if endIdx > len(matchIndices) {
|
if chunkIndices == nil || len(chunkIndices) == 0 {
|
||||||
endIdx = len(matchIndices)
|
|
||||||
}
|
|
||||||
chunkIndices = matchIndices[startIdx:endIdx]
|
|
||||||
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
|
|
||||||
// group did not match
|
// group did not match
|
||||||
if !inclNoMatch {
|
if !inclNoMatch {
|
||||||
continue
|
continue
|
||||||
@ -411,13 +456,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
matchStr = s[chunkIndices[0]:chunkIndices[1]]
|
switch len(chunkIndices) {
|
||||||
|
case 1:
|
||||||
|
// Single character
|
||||||
|
matchStr = string(s[chunkIndices[0]])
|
||||||
|
case 2:
|
||||||
|
// Multiple characters
|
||||||
|
matchStr = s[chunkIndices[0]:chunkIndices[1]]
|
||||||
|
}
|
||||||
|
|
||||||
if _, ok = tmpMap[grpNm]; !ok {
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
tmpMap[grpNm] = make([]string, 0)
|
tmpMap[grpNm] = make([]string, 0)
|
||||||
}
|
}
|
||||||
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
|
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
|
||||||
|
|
||||||
chunkIdx++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This *technically* should be completely handled above.
|
// This *technically* should be completely handled above.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user