ADDED:
* fsutils: better/additional fsattrs functionality
* paths: highly filterable filesystem searching
This commit is contained in:
brent saner
2024-11-16 01:28:24 -05:00
parent b82f0c02ed
commit eefe02afaf
11 changed files with 384 additions and 92 deletions

View File

@@ -19,6 +19,7 @@
package paths
import (
`context`
"errors"
"fmt"
"io/fs"
@@ -28,11 +29,13 @@ import (
`regexp`
`slices`
"strings"
`sync`
`time`
// "syscall"
`github.com/djherbis/times`
`golang.org/x/sync/semaphore`
`r00t2.io/goutils/bitmask`
)
@@ -311,15 +314,22 @@ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error)
* atime (access time)
olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*.
now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.)
*/
func SearchFsPaths(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (foundPaths []string, err error) {
var now time.Time = time.Now()
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
if err = RealPath(&root); err != nil {
return
@@ -329,52 +339,25 @@ func SearchFsPaths(
root,
func(path string, d fs.DirEntry, inErr error) (outErr error) {
var typeMode fs.FileMode
var fi fs.FileInfo
var tspec times.Timespec
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))
var include bool
if inErr != nil {
outErr = inErr
return
}
// patterns
if pathPtrn != nil {
if !pathPtrn.MatchString(path) {
return
}
}
if basePtrn != nil {
if !basePtrn.MatchString(filepath.Base(path)) {
return
}
}
// age
if age != nil {
if tspec, outErr = times.Stat(path); outErr != nil {
return
}
if !filterTimes(tspec, age, &ageType, olderThan, &now) {
return
}
}
// fs object type (file, dir, etc.)
if fi, outErr = d.Info(); outErr != nil {
return
}
typeMode = fi.Mode().Type()
if typeMode == 0 && noFiles {
return
}
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
if include, outErr = filterPath(
path, d,
targetType, noFiles,
basePtrn, pathPtrn,
age, ageType, olderThan, now,
); outErr != nil {
return
}
// All filters passed at this point.
foundPaths = append(foundPaths, path)
if include {
foundPaths = append(foundPaths, path)
}
return
},
@@ -388,6 +371,170 @@ func SearchFsPaths(
return
}
/*
SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent
workers for the filtering logic instead of performing iteratively/recursively.
It may, in some cases, be *slightly more* performant and *slightly less* in others.
Additional options are documented below.
Note that unlike SearchFsPaths, the results written to foundPathsChan are not
guaranteed to be in any predictable order.
All channels are expected to have been initialized by the caller ahead of time,
and all provided channels will be closed upon completion (so they are only safe
to READ from after invoking SearchFsPathsAsync).
foundPathsChan is a channel to which matched filepaths will be written.
sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks.
(semCtx is the context.Context used for sem when acquiring. It may be nil;
one will be locally created if so.)
The default will be to spawn all filtering logic concurrently.
For very large directories, you almost assuredly do not want that -- it
can cause a significant amount of I/O and CPU wait.
(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)
wg *must not* be nil, and must be managed by the caller.
SearchFsPathsAsync will exit with no errors but no-op if wg is nil.
errChan will receive any/all encountered errors.
*/
func SearchFsPathsAsync(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
foundPathsChan chan string,
sem *semaphore.Weighted, semCtx context.Context,
wg *sync.WaitGroup,
errChan chan error,
) {
var err error
var localWg sync.WaitGroup
if wg == nil {
return
}
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
if sem != nil && semCtx == nil {
semCtx = context.Background()
}
if err = filepath.WalkDir(
root,
func(path string, de fs.DirEntry, inErr error) (outErr error) {
localWg.Add(1)
wg.Add(1)
if sem != nil {
if err = sem.Acquire(semCtx, 1); err != nil {
return
}
}
go func(p string, d fs.DirEntry) {
var pErr error
var pInclude bool
defer localWg.Done()
defer wg.Done()
if sem != nil {
defer sem.Release(1)
}
if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil {
errChan <- pErr
return
}
if pInclude {
foundPathsChan <- p
}
}(path, de)
return
},
); err != nil {
errChan <- err
return
}
go func() {
localWg.Wait()
close(foundPathsChan)
close(errChan)
}()
return
}
// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync.
func filterPath(
path string, d fs.DirEntry,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (include bool, err error) {
var typeMode fs.FileMode
var fi fs.FileInfo
var tspec times.Timespec
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
// patterns
if pathPtrn != nil {
if !pathPtrn.MatchString(path) {
return
}
}
if basePtrn != nil {
if !basePtrn.MatchString(filepath.Base(path)) {
return
}
}
// age
if age != nil {
if tspec, err = times.Stat(path); err != nil {
return
}
if !filterTimes(tspec, age, &ageType, olderThan, now) {
return
}
}
// fs object type (file, dir, etc.)
if fi, err = d.Info(); err != nil {
return
}
typeMode = fi.Mode().Type()
if typeMode == 0 && noFiles {
return
} else if typeMode != 0 {
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
return
}
}
include = true
return
}
/*
filterTimes checks a times.Timespec of a file using:
* an age specified by the caller