v1.12.0
FIXED: * paths: Async searching works correctly now, and is consolidated to a single struct for searching options for async and synchronous searches.
This commit is contained in:
284
paths/funcs.go
284
paths/funcs.go
@@ -26,8 +26,7 @@ import (
|
||||
"os"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
`regexp`
|
||||
`slices`
|
||||
`sort`
|
||||
"strings"
|
||||
`sync`
|
||||
`time`
|
||||
@@ -35,7 +34,6 @@ import (
|
||||
// "syscall"
|
||||
|
||||
`github.com/djherbis/times`
|
||||
`golang.org/x/sync/semaphore`
|
||||
`r00t2.io/goutils/bitmask`
|
||||
)
|
||||
|
||||
@@ -277,86 +275,33 @@ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error)
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
SearchPaths gets a file/directory path list based on the provided criteria.
|
||||
// SearchFsPaths gets a file/directory/etc. path list based on the provided criteria.
|
||||
func SearchFsPaths(matcher FsSearchCriteria) (found, miss []*FsSearchResult, err error) {
|
||||
|
||||
targetType defines what should be included in the path list.
|
||||
It can consist of one or more (io/)fs.FileMode types OR'd together
|
||||
(ensure they are part of (io/)fs.ModeType).
|
||||
(You can use 0 to match regular files explicitly, and/or noFiles = true to exclude them.)
|
||||
var matched *FsSearchResult
|
||||
var missed *FsSearchResult
|
||||
|
||||
noFiles, if true, will explicitly filter out regular files from the path results.
|
||||
(Normally they are *always* included regardless of targetType.)
|
||||
|
||||
basePtrn may be nil; if it isn't, it will be applied to *base names*
|
||||
(that is, quux.txt rather than /foo/bar/baz/quux.txt).
|
||||
|
||||
pathPtrn is like basePtrn except it applies to the *entire* path,
|
||||
not just the basename, if not nil (e.g. /foo/bar/baz/quux.txt,
|
||||
not just quux.txt).
|
||||
|
||||
If age is not nil, it will be applied to the path object.
|
||||
It will match older files/directories/etc. if olderThan is true,
|
||||
otherwise it will match newer files/directories/etc.
|
||||
(olderThan is not used otherwise.)
|
||||
|
||||
ageType is one or more Time* constants OR'd together to describe which timestamp type to check.
|
||||
(Note that TimeCreated may not match if specified as it is only available on certain OSes,
|
||||
kernel versions, and filesystems. This may lead to files being excluded that may have otherwise
|
||||
been included.)
|
||||
(You can use TimeAny to specify any supported time.)
|
||||
*Any* matching timestamp of all specified (and supported) timestamp types matches,
|
||||
so be judicious with your selection. They are processed in order of:
|
||||
|
||||
* btime (birth/creation time) (if supported)
|
||||
* mtime (modification time -- contents have changed)
|
||||
* ctime (OS-specific behavior; generally disk metadata has changed) (if supported)
|
||||
* atime (access time)
|
||||
|
||||
olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*.
|
||||
|
||||
now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.)
|
||||
*/
|
||||
func SearchFsPaths(
|
||||
root string,
|
||||
targetType fs.FileMode, noFiles bool,
|
||||
basePtrn, pathPtrn *regexp.Regexp,
|
||||
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
|
||||
) (foundPaths []string, err error) {
|
||||
|
||||
if age != nil {
|
||||
if now == nil {
|
||||
now = new(time.Time)
|
||||
*now = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
if err = RealPath(&root); err != nil {
|
||||
if err = RealPath(&matcher.Root); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err = filepath.WalkDir(
|
||||
root,
|
||||
matcher.Root,
|
||||
func(path string, d fs.DirEntry, inErr error) (outErr error) {
|
||||
|
||||
var include bool
|
||||
|
||||
if inErr != nil {
|
||||
outErr = inErr
|
||||
return
|
||||
}
|
||||
|
||||
if include, outErr = filterPath(
|
||||
path, d,
|
||||
targetType, noFiles,
|
||||
basePtrn, pathPtrn,
|
||||
age, ageType, olderThan, now,
|
||||
); outErr != nil {
|
||||
if matched, missed, outErr = matcher.Match(path, d, nil); outErr != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if include {
|
||||
foundPaths = append(foundPaths, path)
|
||||
if matched != nil && !matcher.NoMatch {
|
||||
found = append(found, matched)
|
||||
}
|
||||
if missed != nil && !matcher.NoMismatch {
|
||||
miss = append(miss, missed)
|
||||
}
|
||||
|
||||
return
|
||||
@@ -365,8 +310,18 @@ func SearchFsPaths(
|
||||
return
|
||||
}
|
||||
|
||||
if found == nil || len(found) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// And sort them.
|
||||
slices.Sort(foundPaths)
|
||||
sort.Slice(
|
||||
found,
|
||||
func(i, j int) (isLess bool) {
|
||||
isLess = found[i].Path < found[j].Path
|
||||
return
|
||||
},
|
||||
)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -375,162 +330,107 @@ func SearchFsPaths(
|
||||
SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent
|
||||
workers for the filtering logic instead of performing iteratively/recursively.
|
||||
It may, in some cases, be *slightly more* performant and *slightly less* in others.
|
||||
Additional options are documented below.
|
||||
Note that unlike SearchFsPaths, the results written to foundPathsChan are not
|
||||
guaranteed to be in any predictable order.
|
||||
Note that unlike SearchFsPaths, the results written to the
|
||||
FsSearchCriteriaAsync.ResChan are not guaranteed to be in any predictable order.
|
||||
|
||||
All channels are expected to have been initialized by the caller ahead of time,
|
||||
and all provided channels will be closed upon completion (so they are only safe
|
||||
to READ from after invoking SearchFsPathsAsync).
|
||||
|
||||
foundPathsChan is a channel to which matched filepaths will be written.
|
||||
|
||||
sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks.
|
||||
(semCtx is the context.Context used for sem when acquiring. It may be nil;
|
||||
one will be locally created if so.)
|
||||
The default will be to spawn all filtering logic concurrently.
|
||||
For very large directories, you almost assuredly do not want that -- it
|
||||
can cause a significant amount of I/O and CPU wait.
|
||||
(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)
|
||||
|
||||
wg *must not* be nil, and must be managed by the caller.
|
||||
SearchFsPathsAsync will exit with no errors but no-op if wg is nil.
|
||||
|
||||
errChan will receive any/all encountered errors.
|
||||
All channels are expected to have already been initialized by the caller.
|
||||
They will not be closed by this function.
|
||||
*/
|
||||
func SearchFsPathsAsync(
|
||||
root string,
|
||||
targetType fs.FileMode, noFiles bool,
|
||||
basePtrn, pathPtrn *regexp.Regexp,
|
||||
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
|
||||
foundPathsChan chan string,
|
||||
sem *semaphore.Weighted, semCtx context.Context,
|
||||
wg *sync.WaitGroup,
|
||||
errChan chan error,
|
||||
) {
|
||||
func SearchFsPathsAsync(matcher FsSearchCriteriaAsync) {
|
||||
|
||||
var err error
|
||||
var localWg sync.WaitGroup
|
||||
var wgLocal sync.WaitGroup
|
||||
var doneChan chan bool = make(chan bool, 1)
|
||||
|
||||
if wg == nil {
|
||||
if matcher.ErrChan == nil {
|
||||
panic(ErrNilErrChan)
|
||||
return
|
||||
}
|
||||
|
||||
if age != nil {
|
||||
if now == nil {
|
||||
now = new(time.Time)
|
||||
*now = time.Now()
|
||||
}
|
||||
if matcher.WG == nil {
|
||||
matcher.ErrChan <- ErrNilWg
|
||||
return
|
||||
}
|
||||
|
||||
if sem != nil && semCtx == nil {
|
||||
semCtx = context.Background()
|
||||
defer matcher.WG.Done()
|
||||
|
||||
if matcher.ResChan == nil && !matcher.NoMatch {
|
||||
matcher.ErrChan <- ErrNilMatchChan
|
||||
return
|
||||
}
|
||||
if matcher.MismatchChan == nil && !matcher.NoMismatch {
|
||||
matcher.ErrChan <- ErrNilMismatchChan
|
||||
return
|
||||
}
|
||||
|
||||
if err = RealPath(&matcher.Root); err != nil {
|
||||
matcher.ErrChan <- err
|
||||
return
|
||||
}
|
||||
|
||||
if matcher.Semaphore != nil && matcher.SemaphoreCtx == nil {
|
||||
matcher.SemaphoreCtx = context.Background()
|
||||
}
|
||||
|
||||
if err = filepath.WalkDir(
|
||||
root,
|
||||
matcher.Root,
|
||||
func(path string, de fs.DirEntry, inErr error) (outErr error) {
|
||||
localWg.Add(1)
|
||||
wg.Add(1)
|
||||
if sem != nil {
|
||||
if err = sem.Acquire(semCtx, 1); err != nil {
|
||||
|
||||
if inErr != nil {
|
||||
inErr = filterNoFileDir(inErr)
|
||||
if inErr != nil {
|
||||
outErr = inErr
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
wgLocal.Add(1)
|
||||
if matcher.Semaphore != nil {
|
||||
if err = matcher.Semaphore.Acquire(matcher.SemaphoreCtx, 1); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
go func(p string, d fs.DirEntry) {
|
||||
var pErr error
|
||||
var pInclude bool
|
||||
var pResMatch *FsSearchResult
|
||||
var pResMiss *FsSearchResult
|
||||
|
||||
defer localWg.Done()
|
||||
defer wg.Done()
|
||||
defer wgLocal.Done()
|
||||
|
||||
if sem != nil {
|
||||
defer sem.Release(1)
|
||||
if matcher.Semaphore != nil {
|
||||
defer matcher.Semaphore.Release(1)
|
||||
}
|
||||
|
||||
if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil {
|
||||
errChan <- pErr
|
||||
if pResMatch, pResMiss, pErr = matcher.Match(p, d, nil); pErr != nil {
|
||||
matcher.ErrChan <- pErr
|
||||
return
|
||||
}
|
||||
|
||||
if pInclude {
|
||||
foundPathsChan <- p
|
||||
if pResMatch != nil && !matcher.NoMatch {
|
||||
matcher.ResChan <- pResMatch
|
||||
}
|
||||
if pResMiss != nil && !matcher.NoMismatch {
|
||||
matcher.MismatchChan <- pResMiss
|
||||
}
|
||||
}(path, de)
|
||||
|
||||
return
|
||||
},
|
||||
); err != nil {
|
||||
errChan <- err
|
||||
return
|
||||
err = filterNoFileDir(err)
|
||||
if err != nil {
|
||||
matcher.ErrChan <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
go func() {
|
||||
localWg.Wait()
|
||||
close(foundPathsChan)
|
||||
close(errChan)
|
||||
wgLocal.Wait()
|
||||
doneChan <- true
|
||||
}()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync.
|
||||
func filterPath(
|
||||
path string, d fs.DirEntry,
|
||||
targetType fs.FileMode, noFiles bool,
|
||||
basePtrn, pathPtrn *regexp.Regexp,
|
||||
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
|
||||
) (include bool, err error) {
|
||||
|
||||
var typeMode fs.FileMode
|
||||
var fi fs.FileInfo
|
||||
var tspec times.Timespec
|
||||
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))
|
||||
|
||||
if age != nil {
|
||||
if now == nil {
|
||||
now = new(time.Time)
|
||||
*now = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
// patterns
|
||||
if pathPtrn != nil {
|
||||
if !pathPtrn.MatchString(path) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if basePtrn != nil {
|
||||
if !basePtrn.MatchString(filepath.Base(path)) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// age
|
||||
if age != nil {
|
||||
if tspec, err = times.Stat(path); err != nil {
|
||||
return
|
||||
}
|
||||
if !filterTimes(tspec, age, &ageType, olderThan, now) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// fs object type (file, dir, etc.)
|
||||
if fi, err = d.Info(); err != nil {
|
||||
return
|
||||
}
|
||||
typeMode = fi.Mode().Type()
|
||||
if typeMode == 0 && noFiles {
|
||||
return
|
||||
} else if typeMode != 0 {
|
||||
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
include = true
|
||||
<-doneChan
|
||||
|
||||
return
|
||||
}
|
||||
@@ -597,3 +497,13 @@ func filterTimes(tspec times.Timespec, age *time.Duration, ageType *pathTimeType
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func filterNoFileDir(err error) (filtered error) {
|
||||
|
||||
filtered = err
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
filtered = nil
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user