FIXED:
* paths: Async searching works correctly now, and is consolidated to a
  single struct for searching options for async and synchronous
  searches.
This commit is contained in:
brent saner
2024-11-18 17:36:14 -05:00
parent eefe02afaf
commit c6efc2d83c
6 changed files with 383 additions and 187 deletions

View File

@@ -26,8 +26,7 @@ import (
"os"
"os/user"
"path/filepath"
`regexp`
`slices`
`sort`
"strings"
`sync`
`time`
@@ -35,7 +34,6 @@ import (
// "syscall"
`github.com/djherbis/times`
`golang.org/x/sync/semaphore`
`r00t2.io/goutils/bitmask`
)
@@ -277,86 +275,33 @@ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error)
return
}
/*
SearchPaths gets a file/directory path list based on the provided criteria.
// SearchFsPaths gets a file/directory/etc. path list based on the provided criteria.
func SearchFsPaths(matcher FsSearchCriteria) (found, miss []*FsSearchResult, err error) {
targetType defines what should be included in the path list.
It can consist of one or more (io/)fs.FileMode types OR'd together
(ensure they are part of (io/)fs.ModeType).
(You can use 0 to match regular files explicitly, and/or noFiles = true to exclude them.)
var matched *FsSearchResult
var missed *FsSearchResult
noFiles, if true, will explicitly filter out regular files from the path results.
(Normally they are *always* included regardless of targetType.)
basePtrn may be nil; if it isn't, it will be applied to *base names*
(that is, quux.txt rather than /foo/bar/baz/quux.txt).
pathPtrn is like basePtrn except it applies to the *entire* path,
not just the basename, if not nil (e.g. /foo/bar/baz/quux.txt,
not just quux.txt).
If age is not nil, it will be applied to the path object.
It will match older files/directories/etc. if olderThan is true,
otherwise it will match newer files/directories/etc.
(olderThan is not used otherwise.)
ageType is one or more Time* constants OR'd together to describe which timestamp type to check.
(Note that TimeCreated may not match if specified as it is only available on certain OSes,
kernel versions, and filesystems. This may lead to files being excluded that may have otherwise
been included.)
(You can use TimeAny to specify any supported time.)
*Any* matching timestamp of all specified (and supported) timestamp types matches,
so be judicious with your selection. They are processed in order of:
* btime (birth/creation time) (if supported)
* mtime (modification time -- contents have changed)
* ctime (OS-specific behavior; generally disk metadata has changed) (if supported)
* atime (access time)
olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*.
now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.)
*/
func SearchFsPaths(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (foundPaths []string, err error) {
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
if err = RealPath(&root); err != nil {
if err = RealPath(&matcher.Root); err != nil {
return
}
if err = filepath.WalkDir(
root,
matcher.Root,
func(path string, d fs.DirEntry, inErr error) (outErr error) {
var include bool
if inErr != nil {
outErr = inErr
return
}
if include, outErr = filterPath(
path, d,
targetType, noFiles,
basePtrn, pathPtrn,
age, ageType, olderThan, now,
); outErr != nil {
if matched, missed, outErr = matcher.Match(path, d, nil); outErr != nil {
return
}
if include {
foundPaths = append(foundPaths, path)
if matched != nil && !matcher.NoMatch {
found = append(found, matched)
}
if missed != nil && !matcher.NoMismatch {
miss = append(miss, missed)
}
return
@@ -365,8 +310,18 @@ func SearchFsPaths(
return
}
if found == nil || len(found) == 0 {
return
}
// And sort them.
slices.Sort(foundPaths)
sort.Slice(
found,
func(i, j int) (isLess bool) {
isLess = found[i].Path < found[j].Path
return
},
)
return
}
@@ -375,162 +330,107 @@ func SearchFsPaths(
SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent
workers for the filtering logic instead of performing iteratively/recursively.
It may, in some cases, be *slightly more* performant and *slightly less* in others.
Additional options are documented below.
Note that unlike SearchFsPaths, the results written to foundPathsChan are not
guaranteed to be in any predictable order.
Note that unlike SearchFsPaths, the results written to the
FsSearchCriteriaAsync.ResChan are not guaranteed to be in any predictable order.
All channels are expected to have been initialized by the caller ahead of time,
and all provided channels will be closed upon completion (so they are only safe
to READ from after invoking SearchFsPathsAsync).
foundPathsChan is a channel to which matched filepaths will be written.
sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks.
(semCtx is the context.Context used for sem when acquiring. It may be nil;
one will be locally created if so.)
The default will be to spawn all filtering logic concurrently.
For very large directories, you almost assuredly do not want that -- it
can cause a significant amount of I/O and CPU wait.
(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)
wg *must not* be nil, and must be managed by the caller.
SearchFsPathsAsync will exit with no errors but no-op if wg is nil.
errChan will receive any/all encountered errors.
All channels are expected to have already been initialized by the caller.
They will not be closed by this function.
*/
func SearchFsPathsAsync(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
foundPathsChan chan string,
sem *semaphore.Weighted, semCtx context.Context,
wg *sync.WaitGroup,
errChan chan error,
) {
func SearchFsPathsAsync(matcher FsSearchCriteriaAsync) {
var err error
var localWg sync.WaitGroup
var wgLocal sync.WaitGroup
var doneChan chan bool = make(chan bool, 1)
if wg == nil {
if matcher.ErrChan == nil {
panic(ErrNilErrChan)
return
}
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
if matcher.WG == nil {
matcher.ErrChan <- ErrNilWg
return
}
if sem != nil && semCtx == nil {
semCtx = context.Background()
defer matcher.WG.Done()
if matcher.ResChan == nil && !matcher.NoMatch {
matcher.ErrChan <- ErrNilMatchChan
return
}
if matcher.MismatchChan == nil && !matcher.NoMismatch {
matcher.ErrChan <- ErrNilMismatchChan
return
}
if err = RealPath(&matcher.Root); err != nil {
matcher.ErrChan <- err
return
}
if matcher.Semaphore != nil && matcher.SemaphoreCtx == nil {
matcher.SemaphoreCtx = context.Background()
}
if err = filepath.WalkDir(
root,
matcher.Root,
func(path string, de fs.DirEntry, inErr error) (outErr error) {
localWg.Add(1)
wg.Add(1)
if sem != nil {
if err = sem.Acquire(semCtx, 1); err != nil {
if inErr != nil {
inErr = filterNoFileDir(inErr)
if inErr != nil {
outErr = inErr
return
}
}
wgLocal.Add(1)
if matcher.Semaphore != nil {
if err = matcher.Semaphore.Acquire(matcher.SemaphoreCtx, 1); err != nil {
return
}
}
go func(p string, d fs.DirEntry) {
var pErr error
var pInclude bool
var pResMatch *FsSearchResult
var pResMiss *FsSearchResult
defer localWg.Done()
defer wg.Done()
defer wgLocal.Done()
if sem != nil {
defer sem.Release(1)
if matcher.Semaphore != nil {
defer matcher.Semaphore.Release(1)
}
if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil {
errChan <- pErr
if pResMatch, pResMiss, pErr = matcher.Match(p, d, nil); pErr != nil {
matcher.ErrChan <- pErr
return
}
if pInclude {
foundPathsChan <- p
if pResMatch != nil && !matcher.NoMatch {
matcher.ResChan <- pResMatch
}
if pResMiss != nil && !matcher.NoMismatch {
matcher.MismatchChan <- pResMiss
}
}(path, de)
return
},
); err != nil {
errChan <- err
return
err = filterNoFileDir(err)
if err != nil {
matcher.ErrChan <- err
return
}
}
go func() {
localWg.Wait()
close(foundPathsChan)
close(errChan)
wgLocal.Wait()
doneChan <- true
}()
return
}
// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync.
func filterPath(
path string, d fs.DirEntry,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (include bool, err error) {
var typeMode fs.FileMode
var fi fs.FileInfo
var tspec times.Timespec
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
// patterns
if pathPtrn != nil {
if !pathPtrn.MatchString(path) {
return
}
}
if basePtrn != nil {
if !basePtrn.MatchString(filepath.Base(path)) {
return
}
}
// age
if age != nil {
if tspec, err = times.Stat(path); err != nil {
return
}
if !filterTimes(tspec, age, &ageType, olderThan, now) {
return
}
}
// fs object type (file, dir, etc.)
if fi, err = d.Info(); err != nil {
return
}
typeMode = fi.Mode().Type()
if typeMode == 0 && noFiles {
return
} else if typeMode != 0 {
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
return
}
}
include = true
<-doneChan
return
}
@@ -597,3 +497,13 @@ func filterTimes(tspec times.Timespec, age *time.Duration, ageType *pathTimeType
return
}
func filterNoFileDir(err error) (filtered error) {
filtered = err
if errors.Is(err, fs.ErrNotExist) {
filtered = nil
}
return
}