FIX:
* Yeah so the ReMap.Map* stuff was kind of broken hard. It's fixed now.
This commit is contained in:
brent saner 2025-08-04 04:26:52 +00:00
parent 154170c0e5
commit 368ae0cb8e
Signed by: bts
GPG Key ID: 8C004C2F93481F6B
2 changed files with 273 additions and 21 deletions

View File

@ -2,19 +2,141 @@ package remap
/*
Map returns a map[string]<match bytes> for regexes with named capture groups matched in bytes b.
Note that this supports non-unique group names; regexp.Regexp allows for patterns with multiple groups
using the same group name. Each match for each group is in a slice keyed under that group name, with
that slice ordered by the indexing done by the regex match itself.
matches and/or its values may be nil or empty under the following condition tree:
matches will be nil if no named capture group matches were found.
IF b is nil:
THEN matches will always be nil
ELSE:
IF all of b does not match pattern
IF mustMuch is true
THEN matches == nil
ELSE
THEN matches == map[string][][]byte{} (non-nil but empty)
ELSE IF pattern has no named capture groups
IF inclNoMatch is true
THEN matches == map[string][][]byte{} (non-nil but empty)
ELSE
THEN matches == nil
ELSE
IF there are no named group matches
IF inclNoMatch is true
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
ELSE
THEN matches == nil
ELSE
IF <group name> does not have a match
IF inclNoMatch is true
IF inclNoMatchStrict is true
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
(matches[<group name>] == [][]byte{nil[, nil...]})
ELSE
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
ELSE
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
ELSE
matches[<group name>] == []{<match>[, <match>...]}
*/
func (r *ReMap) Map(b []byte) (matches map[string][]byte) {
func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) {
var m [][]byte
var tmpMap map[string][]byte = make(map[string][]byte)
var ok bool
var mIdx int
var match []byte
var grpNm string
var names []string
var matchBytes [][]byte
var tmpMap map[string][][]byte = make(map[string][][]byte)
m = r.Regexp.FindSubmatch(b)
if b == nil {
return
}
for idx, grpNm := range r.Regexp.SubexpNames() {
if idx != 0 && grpNm != "" {
tmpMap[grpNm] = m[idx]
names = r.Regexp.SubexpNames()
matchBytes = r.Regexp.FindSubmatch(b)
if matchBytes == nil {
// b does not match pattern
if !mustMatch {
matches = make(map[string][][]byte)
}
return
}
if names == nil || len(names) == 0 || len(names) == 1 {
/*
no named capture groups;
technically only the last condition would be the case.
*/
if inclNoMatch {
matches = make(map[string][][]byte)
}
return
}
names = names[1:]
if len(matchBytes) == 0 || len(matchBytes) == 1 {
/*
no submatches whatsoever.
*technically* I don't think this condition can actually be reached.
This is more of a safe-return before we re-slice.
*/
matches = make(map[string][][]byte)
if inclNoMatch {
if len(names) >= 1 {
for _, grpNm = range names {
matches[grpNm] = nil
}
}
}
return
}
matchBytes = matchBytes[1:]
for mIdx, match = range matchBytes {
grpNm = names[mIdx]
/*
Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name.
So we don't need to worry about accounting for that,
and can just skip over grpNm == "" (which is an *unnamed* capture group).
*/
if grpNm == "" {
continue
}
if match == nil {
// group did not match
if !inclNoMatch {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
if !inclNoMatchStrict {
tmpMap[grpNm] = nil
} else {
tmpMap[grpNm] = [][]byte{nil}
}
} else {
if inclNoMatchStrict {
tmpMap[grpNm] = append(tmpMap[grpNm], nil)
}
}
continue
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([][]byte, 0)
}
tmpMap[grpNm] = append(tmpMap[grpNm], match)
}
// This *technically* should be completely handled above.
if inclNoMatch {
for _, grpNm = range names {
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
}
}
@ -26,20 +148,137 @@ func (r *ReMap) Map(b []byte) (matches map[string][]byte) {
}
/*
MapString returns a map[string]<match string> for regexes with named capture groups matched in string s.
MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead. (matches will always be nil if s == ``.)
matches will be nil if no named capture group matches were found.
A small deviation, though; empty strings instead of nils (because duh) will occupy placeholders (if inclNoMatchStrict is specified).
*/
func (r *ReMap) MapString(s string) (matches map[string]string) {
func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
var m []string
var tmpMap map[string]string = make(map[string]string)
var ok bool
var endIdx int
var startIdx int
var chunkIdx int
var grpNm string
var names []string
var matchStr string
var idxChunks [][]int
var matchIndices []int
var chunkIndices []int // always 2 elements; start pos and end pos
var tmpMap map[string][]string = make(map[string][]string)
m = r.Regexp.FindStringSubmatch(s)
/*
OK so this is a bit of a deviation.
for idx, grpNm := range r.Regexp.SubexpNames() {
if idx != 0 && grpNm != "" {
tmpMap[grpNm] = m[idx]
It's not as straightforward as above, because there isn't an explicit way
like above to determine if a patterb was *matched as an empty string* vs.
*not matched*.
So instead do roundabout index-y things.
*/
if s == "" {
return
}
names = r.Regexp.SubexpNames()
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
if matchIndices == nil {
// s does not match pattern
if !mustMatch {
matches = make(map[string][]string)
}
return
}
if names == nil || len(names) == 0 || len(names) == 1 {
/*
no named capture groups;
technically only the last condition would be the case.
*/
if inclNoMatch {
matches = make(map[string][]string)
}
return
}
names = names[1:]
if len(matchIndices) == 0 || len(matchIndices) == 1 {
/*
no submatches whatsoever.
*technically* I don't think this condition can actually be reached.
This is more of a safe-return before we chunk the indices.
*/
matches = make(map[string][]string)
if inclNoMatch {
if len(names) >= 1 {
for _, grpNm = range names {
matches[grpNm] = nil
}
}
}
return
}
/*
The reslice starts at 2 because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first *pair* is the entire pattern match.
Thus the len(matchIndices) == 2*len(names).
Keep in mind that since the first element of names is removed,
the first pair here is also removed.
*/
matchIndices = matchIndices[2:]
idxChunks = make([][]int, len(names))
for startIdx = 0; startIdx < len(idxChunks); startIdx += 2 {
endIdx = startIdx + 2
grpNm = names[chunkIdx]
/*
Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name.
So we don't need to worry about accounting for that,
and can just skip over grpNm == "" (which is an *unnamed* capture group).
*/
if grpNm == "" {
continue
}
// This technically should never happen.
if endIdx > len(matchIndices) {
endIdx = len(matchIndices)
}
chunkIndices = matchIndices[startIdx:endIdx]
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
// group did not match
if !inclNoMatch {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
if !inclNoMatchStrict {
tmpMap[grpNm] = nil
} else {
tmpMap[grpNm] = []string{""}
}
} else {
if inclNoMatchStrict {
tmpMap[grpNm] = append(tmpMap[grpNm], "")
}
}
continue
}
matchStr = s[chunkIndices[0]:chunkIndices[1]]
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([]string, 0)
}
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
chunkIdx++
}
// This *technically* should be completely handled above.
if inclNoMatch {
for _, grpNm = range names {
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
}
}

View File

@ -4,7 +4,20 @@ import (
`regexp`
)
// ReMap provides some map-related functions around a regexp.Regexp.
type ReMap struct {
*regexp.Regexp
}
type (
// ReMap provides some map-related functions around a regexp.Regexp.
ReMap struct {
*regexp.Regexp
}
/*
ExplicitStringMatch is used with ReMap.MapStringExplicit to indicate if a
capture group result is a hit (a group matched, but e.g. the match value is empty string)
or not (a group did not match)
*/
ExplicitStringMatch struct {
Group string
IsMatch bool
Value string
}
)