You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
844 lines
26 KiB
Python
844 lines
26 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf8 -*-
|
|
|
|
# ____ ____ __ ____ __ _ ____ __ _ _ ____ __ __ _ ____
|
|
# ( __)( _ \( )( __)( ( \( \( ) ( \/ )___( __)( )( ( \( \
|
|
# ) _) ) / )( ) _) / / ) D (/ (_/\ ) /(___)) _) )( / / ) D (
|
|
# (__) (__\_)(__)(____)\_)__)(____/\____/(__/ (__) (__)\_)__)(____/
|
|
#
|
|
# The friendlier file finder.
|
|
|
|
import time
|
|
import os
|
|
import optparse
|
|
import string
|
|
import sys
|
|
import re
|
|
from optparse import OptionParser, OptionGroup
|
|
|
|
|
|
# Constants -------------------------------------------------------------------
|
|
CASE_SENSITIVE = 1
|
|
CASE_INSENSITIVE = 2
|
|
CASE_SMART = 3
|
|
|
|
BYTE = 1
|
|
KILOBYTE = 1024 * BYTE
|
|
MEGABYTE = 1024 * KILOBYTE
|
|
GIGABYTE = 1024 * MEGABYTE
|
|
TERABYTE = 1024 * GIGABYTE
|
|
PETABYTE = 1024 * TERABYTE
|
|
|
|
VCS_DIRS = ['.hg', '.git', '.svn']
|
|
|
|
TYPE_FILE_REAL = 1
|
|
TYPE_FILE_SYMLINK = 2
|
|
TYPE_DIR_REAL = 3
|
|
TYPE_DIR_SYMLINK = 4
|
|
|
|
TYPES_FILE_REAL = set([TYPE_FILE_REAL])
|
|
TYPES_FILE_SYMLINK = set([TYPE_FILE_SYMLINK])
|
|
TYPES_DIR_REAL = set([TYPE_DIR_REAL])
|
|
TYPES_DIR_SYMLINK = set([TYPE_DIR_SYMLINK])
|
|
|
|
TYPES_FILE = TYPES_FILE_REAL | TYPES_FILE_SYMLINK
|
|
TYPES_DIR = TYPES_DIR_REAL | TYPES_DIR_SYMLINK
|
|
|
|
TYPES_REAL = TYPES_FILE_REAL | TYPES_DIR_REAL
|
|
TYPES_SYMLINK = TYPES_FILE_SYMLINK | TYPES_DIR_SYMLINK
|
|
|
|
TYPES_ALL = TYPES_FILE | TYPES_DIR
|
|
|
|
SECOND = 1
|
|
MINUTE = 60 * SECOND
|
|
HOUR = 60 * MINUTE
|
|
DAY = 24 * HOUR
|
|
WEEK = 7 * DAY
|
|
MONTH = 30 * DAY
|
|
YEAR = int(365.2425 * DAY)
|
|
|
|
IGNORE_SYNTAX_REGEX = 1
|
|
IGNORE_SYNTAX_GLOB = 2
|
|
IGNORE_SYNTAX_LITERAL = 3
|
|
|
|
IGNORE_MODE_RESTRICTED = 1
|
|
IGNORE_MODE_SEMI = 2
|
|
IGNORE_MODE_UNRESTRICTED = 3
|
|
IGNORE_MODE_ALL = 4
|
|
|
|
|
|
# Regexes ---------------------------------------------------------------------
|
|
SIZE_RE = re.compile(r'^(\d+(?:\.\d+)?)([bkmgtp])?[a-z]*$', re.IGNORECASE)
|
|
|
|
AGO_RE = re.compile(r'''
|
|
(\d+(?:\.\d+)?) # The number (float/int)
|
|
\s* # Optional whitespace
|
|
( # Units
|
|
y(?:ears?)? # y/year/years
|
|
| mos?(?:nths?)? # mo/mos/month/months
|
|
| w(?:eeks?)? # w/week/weeks
|
|
| d(?:ays?)? # d/day/days
|
|
| h(?:ours?)? # h/hour/hours
|
|
| m(?:ins?(?:utes?)?)? # m/min/mins/minute/minutes
|
|
| s(?:ecs?(?:onds?)?)? # s/sec/secs/second/seconds
|
|
)
|
|
''', re.VERBOSE | re.IGNORECASE)
|
|
|
|
IGNORE_SYNTAX_RE = re.compile(r'^\s*syntax:\s*(glob|regexp|regex|re|literal)\s*$',
|
|
re.IGNORECASE)
|
|
IGNORE_COMMENT_RE = re.compile(r'^\s*#')
|
|
IGNORE_BLANK_RE = re.compile(r'^\s*$')
|
|
|
|
GITIGNORE_COMMENT_RE = re.compile(r'^\s*#')
|
|
GITIGNORE_BLANK_RE = re.compile(r'^\s*$')
|
|
GITIGNORE_NEGATE_RE = re.compile(r'^\s*!')
|
|
|
|
HGIGNORE_SYNTAX_RE = re.compile(r'^\s*syntax:\s*(glob|regexp|re)\s*$',
|
|
re.IGNORECASE)
|
|
HGIGNORE_COMMENT_RE = re.compile(r'^\s*#')
|
|
HGIGNORE_BLANK_RE = re.compile(r'^\s*$')
|
|
|
|
|
|
# Global Options --------------------------------------------------------------
|
|
# (it's a prototype, shut up)
|
|
options = None
|
|
|
|
|
|
# Output ----------------------------------------------------------------------
|
|
def out(s, line_ending='\n'):
|
|
sys.stdout.write(s + line_ending)
|
|
|
|
def err(s):
|
|
sys.stderr.write(s + '\n')
|
|
|
|
def die(s, exitcode=1):
|
|
err('error: ' + s)
|
|
sys.exit(exitcode)
|
|
|
|
def warn(s):
|
|
sys.stderr.write('warning: ' + s + '\n')
|
|
|
|
|
|
# Ingore Files ----------------------------------------------------------------
|
|
def compile_re(line):
|
|
try:
|
|
r = re.compile(line)
|
|
return lambda s: r.search(s)
|
|
except:
|
|
warn('could not compile regular expression "%s"' % line)
|
|
return lambda s: False
|
|
|
|
def glob_to_re(glob):
|
|
pat = ''
|
|
|
|
chs = list(glob)
|
|
while chs:
|
|
ch = chs.pop(0)
|
|
if ch == '\\':
|
|
pat += re.escape(chs.pop(0))
|
|
elif ch == '?':
|
|
pat += '.'
|
|
elif ch == '*':
|
|
if chs and chs[0] == '*':
|
|
chs.pop(0)
|
|
pat += '.*'
|
|
else:
|
|
pat += '[^/]*'
|
|
elif ch == '[':
|
|
pat += '['
|
|
ch = chs.pop(0)
|
|
while chs and ch != ']':
|
|
pat += ch
|
|
ch = chs.pop(0)
|
|
pat += ']'
|
|
else:
|
|
pat += re.escape(ch)
|
|
|
|
return pat
|
|
|
|
def compile_literal(line):
|
|
l = line
|
|
return lambda s: l in s
|
|
|
|
def compile_git(line):
|
|
original_line = line
|
|
pat = ''
|
|
|
|
# From man gitignore 5:
|
|
# If the pattern ends with a slash, it is removed for the purpose of the
|
|
# following description, but it would only find a match with
|
|
# a directory. In other words, foo/ will match a directory foo and paths
|
|
# underneath it, but will not match a regular file or a symbolic link
|
|
# foo (this is consistent with the way how pathspec works in general in
|
|
# git).
|
|
#
|
|
# A leading slash matches the beginning of the pathname. For example,
|
|
# "/*.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c".
|
|
#
|
|
# If the pattern does not contain a slash /, git treats it as a shell
|
|
# glob pattern and checks for a match against the pathname relative to
|
|
# the location of the .gitignore file (relative to the toplevel of the
|
|
# work tree if not from a .gitignore file).
|
|
#
|
|
# Otherwise, git treats the pattern as a shell glob suitable for
|
|
# consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the
|
|
# pattern will not match a / in the pathname. For example,
|
|
# "Documentation/*.html" matches "Documentation/git.html" but not
|
|
# "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html".
|
|
#
|
|
# If you can't tell what the hell this means you're not alone, because git's
|
|
# documentation is fucking inscrutable. Here's what I've come up with from
|
|
# trial and error:
|
|
#
|
|
# 0. Patterns ending in a slash will only match directories, and then you
|
|
# can ignore that slash for the rest of these rules.
|
|
# 1. Patterns are shell globs, except * doesn't match / and there's no **.
|
|
# 2. Patterns without a slash search the basename of the path, for example:
|
|
# the 'file.txt' in '/foo/bar/file.txt'.
|
|
# 3. Patterns with a slash search against the entire path.
|
|
# 4. All matching must match the entire string it's searching. For example:
|
|
#
|
|
# 'am' will not ignore '/foo/bar/spam'
|
|
# it matches against the basename 'spam' but does not match all of it
|
|
#
|
|
# 'bar/spam' will not ignore '/foo/bar/spam'
|
|
# it matches against the full path (because it has a slash) but does not
|
|
# match all of it.
|
|
# 5. A leading slash doesn't affect the matching, but does turn a
|
|
# "pattern with no slash" into a "pattern with a slash". So:
|
|
#
|
|
# 'bar' will ignore '/foo/bar/spam' (actually it'll ignore bar entirely)
|
|
# it matches against the basename 'bar' (because there's no slash) when
|
|
# at that level
|
|
#
|
|
# '/bar' will not ignore '/foo/bar/spam'
|
|
# it matches against the entire path '/foo/bar' (because there is
|
|
# a slash) when at that level
|
|
|
|
if line.endswith('/'):
|
|
# TODO: Deal with this.
|
|
# directories_only = True
|
|
line = line[:-1]
|
|
|
|
has_slash = '/' in line
|
|
|
|
line = line.lstrip('/')
|
|
|
|
if has_slash:
|
|
# Patterns with a slash have to match against the entire pathname. So
|
|
# they need to be rooted at the beginning.
|
|
pat += '^./'
|
|
else:
|
|
# Patterns without a slash match against just the basename, which we'll
|
|
# simulate by including the (final) divider in the pattern.
|
|
pat += '/'
|
|
|
|
# The rest of the pattern is git's variation on shell globs.
|
|
# Mostly normal shell globs, but there's no **.
|
|
chs = list(line)
|
|
while chs:
|
|
ch = chs.pop(0)
|
|
if ch == '?':
|
|
pat += '.'
|
|
elif ch == '*':
|
|
pat += '[^/]*'
|
|
elif ch == '[':
|
|
pat += '['
|
|
ch = chs.pop(0)
|
|
while chs and ch != ']':
|
|
pat += ch
|
|
ch = chs.pop(0)
|
|
pat += ']'
|
|
else:
|
|
pat += re.escape(ch)
|
|
|
|
# Patterns always have the be anchored at the end.
|
|
pat += '$'
|
|
|
|
try:
|
|
return compile_re(pat)
|
|
except:
|
|
warn("could not parse gitignore pattern '%s'" % original_line)
|
|
return lambda s: True
|
|
|
|
def compile_hg_glob(line):
|
|
pat = glob_to_re(line)
|
|
|
|
# Mercurial ignore globs are quasi-rooted at directory boundaries or the
|
|
# beginning of the pattern.
|
|
pat = '(^|/)' + pat
|
|
|
|
# Mercurial globs also have to match to the end of the pattern.
|
|
pat = pat + '$'
|
|
|
|
try:
|
|
regex = re.compile(pat)
|
|
return lambda s: regex.search(s[2:] if s.startswith('./') else s)
|
|
except:
|
|
warn("could not parse hgignore pattern '%s'" % line)
|
|
return lambda s: True
|
|
|
|
def compile_ff_glob(line):
|
|
pat = glob_to_re(line)
|
|
|
|
try:
|
|
return compile_re(pat)
|
|
except:
|
|
warn("could not parse ffignore pattern '%s'" % line)
|
|
return lambda s: True
|
|
|
|
|
|
def parse_gitignore_file(path):
|
|
if not os.path.isfile(path):
|
|
return []
|
|
|
|
ignorers = []
|
|
with open(path) as f:
|
|
for line in f.readlines():
|
|
line = line.rstrip('\n')
|
|
if GITIGNORE_BLANK_RE.match(line):
|
|
continue
|
|
elif GITIGNORE_COMMENT_RE.match(line):
|
|
continue
|
|
elif GITIGNORE_NEGATE_RE.match(line):
|
|
# TODO: This bullshit feature.
|
|
continue
|
|
else:
|
|
# This line is a gitignore pattern.
|
|
ignorers.append(compile_git(line))
|
|
|
|
return ignorers
|
|
|
|
def parse_hgignore_file(path):
|
|
if not os.path.isfile(path):
|
|
return []
|
|
|
|
syntax = IGNORE_SYNTAX_REGEX
|
|
ignorers = []
|
|
with open(path) as f:
|
|
for line in f.readlines():
|
|
line = line.rstrip('\n')
|
|
if HGIGNORE_BLANK_RE.match(line):
|
|
continue
|
|
elif HGIGNORE_COMMENT_RE.match(line):
|
|
continue
|
|
elif HGIGNORE_SYNTAX_RE.match(line):
|
|
s = HGIGNORE_SYNTAX_RE.match(line).groups()[0].lower()
|
|
if s == 'glob':
|
|
syntax = IGNORE_SYNTAX_GLOB
|
|
elif s in ['re', 'regexp']:
|
|
syntax = IGNORE_SYNTAX_REGEX
|
|
else:
|
|
# This line is a pattern.
|
|
if syntax == IGNORE_SYNTAX_REGEX:
|
|
ignorers.append(compile_re(line))
|
|
elif syntax == IGNORE_SYNTAX_GLOB:
|
|
ignorers.append(compile_hg_glob(line))
|
|
|
|
return ignorers
|
|
|
|
def parse_ffignore_file(path):
|
|
if not os.path.isfile(path):
|
|
return []
|
|
|
|
syntax = IGNORE_SYNTAX_REGEX
|
|
ignorers = []
|
|
with open(path) as f:
|
|
for line in f.readlines():
|
|
line = line.rstrip('\n')
|
|
if IGNORE_BLANK_RE.match(line):
|
|
continue
|
|
elif IGNORE_COMMENT_RE.match(line):
|
|
continue
|
|
elif IGNORE_SYNTAX_RE.match(line):
|
|
s = IGNORE_SYNTAX_RE.match(line).groups()[0].lower()
|
|
if s == 'literal':
|
|
syntax = IGNORE_SYNTAX_LITERAL
|
|
elif s == 'glob':
|
|
syntax = IGNORE_SYNTAX_GLOB
|
|
elif s in ['re', 'regex', 'regexp']:
|
|
syntax = IGNORE_SYNTAX_REGEX
|
|
else:
|
|
# This line is a pattern.
|
|
if syntax == IGNORE_SYNTAX_LITERAL:
|
|
ignorers.append(compile_literal(line))
|
|
elif syntax == IGNORE_SYNTAX_REGEX:
|
|
ignorers.append(compile_re(line))
|
|
elif syntax == IGNORE_SYNTAX_GLOB:
|
|
ignorers.append(compile_ff_glob(line))
|
|
|
|
return ignorers
|
|
|
|
def parse_ignore_files(dir):
|
|
ignorers = []
|
|
for filename in options.ignore_files:
|
|
target = os.path.join(dir, filename)
|
|
if filename == '.ffignore':
|
|
ignorers.extend(parse_ffignore_file(target))
|
|
elif filename == '.gitignore':
|
|
ignorers.extend(parse_gitignore_file(target))
|
|
elif filename == '.hgignore':
|
|
ignorers.extend(parse_hgignore_file(target))
|
|
return ignorers
|
|
|
|
|
|
def get_initial_ignorers():
|
|
if '.ffignore' in options.ignore_files:
|
|
home = os.environ.get('HOME')
|
|
if home:
|
|
return parse_ffignore_file(os.path.join(home, '.ffignore'))
|
|
else:
|
|
return []
|
|
else:
|
|
return []
|
|
|
|
|
|
# Searching! ------------------------------------------------------------------
|
|
def get_type(path):
|
|
link = os.path.islink(path)
|
|
dir = os.path.isdir(path)
|
|
|
|
if link and dir:
|
|
return TYPE_DIR_SYMLINK
|
|
elif link and not dir:
|
|
return TYPE_FILE_SYMLINK
|
|
elif not link and dir:
|
|
return TYPE_DIR_REAL
|
|
elif not link and not dir:
|
|
return TYPE_FILE_REAL
|
|
|
|
def should_ignore(basename, path, ignorers):
|
|
if options.ignore_vcs_dirs and basename in VCS_DIRS:
|
|
return True
|
|
|
|
for i in ignorers:
|
|
if i(path):
|
|
return True
|
|
|
|
return False
|
|
|
|
def match(query, path, basename):
|
|
def _match():
|
|
if options.type != TYPES_ALL:
|
|
if get_type(path) not in options.type:
|
|
return False
|
|
|
|
if not query(path if options.entire else basename):
|
|
return False
|
|
|
|
stat = os.lstat(path)
|
|
if options.larger_than:
|
|
if stat.st_size < options.larger_than:
|
|
return False
|
|
|
|
if options.smaller_than:
|
|
if stat.st_size > options.smaller_than:
|
|
return False
|
|
|
|
if options.before:
|
|
if stat.st_mtime > options.before:
|
|
return False
|
|
|
|
if options.after:
|
|
if stat.st_mtime < options.after:
|
|
return False
|
|
|
|
if not options.binary:
|
|
# We open in non-blocking mode so things like file-based sockets
|
|
# don't hang while waiting for their full kb.
|
|
# TODO: Ignore those altogether for the binary check?
|
|
fd = os.open(path, os.O_NONBLOCK)
|
|
with os.fdopen(fd) as f:
|
|
if '\0' in f.read(1024):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
result = _match()
|
|
return not result if options.invert else result
|
|
|
|
|
|
def _search(query, dir, depth, ignorers):
|
|
ignorers = ignorers + parse_ignore_files(dir)
|
|
|
|
contents = os.listdir(dir)
|
|
next = []
|
|
|
|
for item in contents:
|
|
path = os.path.join(dir, item)
|
|
if not should_ignore(item, path, ignorers):
|
|
if match(query, path, item):
|
|
out(path, '\0' if options.zero else '\n')
|
|
|
|
is_dir = os.path.isdir(path)
|
|
if is_dir:
|
|
if options.follow or not os.path.islink(path):
|
|
next.append(path)
|
|
|
|
|
|
if depth < options.depth:
|
|
for d in next:
|
|
_search(query, d, depth + 1, ignorers)
|
|
|
|
def search(query, dir='.', depth=0, ignorers=None):
|
|
_search(query, '.', 0, get_initial_ignorers())
|
|
|
|
|
|
# Option Parsing and Main -----------------------------------------------------
|
|
def build_option_parser():
|
|
p = OptionParser("usage: %prog [options] PATTERN")
|
|
|
|
# Main options
|
|
p.add_option('-d', '--dir', default='.',
|
|
help='root the search in DIR (default .)',
|
|
metavar='DIR')
|
|
p.add_option('-D', '--depth', default='25',
|
|
help='search at most N directories deep (default 25)',
|
|
metavar='N')
|
|
p.add_option('-f', '--follow',
|
|
action='store_true', default=False,
|
|
help='follow symlinked directories and search their contents')
|
|
p.add_option('-F', '--no-follow',
|
|
dest='follow', action='store_false',
|
|
help="don't follow symlinked directories (default)")
|
|
p.add_option('-0', '--print0', dest='zero',
|
|
action='store_true', default=False,
|
|
help='separate matches with a null byte in output')
|
|
p.add_option('-l', '--literal',
|
|
action='store_true', default=False,
|
|
help='force literal search, even if it looks like a regex')
|
|
p.add_option('-v', '--invert',
|
|
action='store_true', default=False,
|
|
help='invert match')
|
|
p.add_option('-e', '--entire',
|
|
action='store_true', default=False,
|
|
help='match PATTERN against the entire path string')
|
|
p.add_option('-E', '--non-entire', dest='entire',
|
|
action='store_false',
|
|
help='match PATTERN against only the filenames (default)')
|
|
|
|
# Case sensitivity
|
|
g = OptionGroup(p, "Configuring Case Sensitivity")
|
|
g.add_option('-s', '--case-sensitive',
|
|
dest='case', action='store_const', const=CASE_SENSITIVE,
|
|
default=CASE_SENSITIVE,
|
|
help='case sensitive matching (default)')
|
|
g.add_option('-i', '--case-insensitive',
|
|
dest='case', action='store_const', const=CASE_INSENSITIVE,
|
|
help='case insensitive matching')
|
|
g.add_option('-S', '--case-smart',
|
|
dest='case', action='store_const', const=CASE_SMART,
|
|
help='smart case matching (sensitive if any uppercase chars '
|
|
'are in the pattern, insensitive otherwise)')
|
|
p.add_option_group(g)
|
|
|
|
# Ignoring
|
|
g = OptionGroup(p, "Configuring Ignoring")
|
|
|
|
g.add_option('-b', '--binary',
|
|
dest='binary', action='store_true', default=True,
|
|
help="allow binary files (default)")
|
|
|
|
g.add_option('-B', '--no-binary',
|
|
dest='binary', action='store_false',
|
|
help='ignore binary files')
|
|
|
|
g.add_option('-r', '--restricted', dest='ignore_mode',
|
|
action='store_const', const=IGNORE_MODE_RESTRICTED,
|
|
default=IGNORE_MODE_RESTRICTED,
|
|
help="restricted search (skip VCS directories, "
|
|
"parse all ignore files) (default)")
|
|
|
|
g.add_option('-q', '--semi-restricted', dest='ignore_mode',
|
|
action='store_const', const=IGNORE_MODE_SEMI,
|
|
help="semi-restricted search (don't parse VCS ignore files, "
|
|
"but still skip VCS directories and parse .ffignore)")
|
|
|
|
g.add_option('-u', '--unrestricted', dest='ignore_mode',
|
|
action='store_const', const=IGNORE_MODE_UNRESTRICTED,
|
|
help="unrestricted search (don't parse ignore files, but "
|
|
"still skip VCS directories)")
|
|
|
|
g.add_option('-a', '--all', dest='ignore_mode',
|
|
action='store_const', const=IGNORE_MODE_ALL,
|
|
help="don't ignore anything (ALL files can match)")
|
|
|
|
g.add_option('-I', '--ignore', metavar='PATTERN',
|
|
action='append',
|
|
help="add a pattern to be ignored (can be given multiple times)")
|
|
|
|
p.add_option_group(g)
|
|
|
|
# Time filtering
|
|
g = OptionGroup(p, "Time Filtering")
|
|
g.add_option('--before',
|
|
help='match files modified < TIME',
|
|
metavar='TIME')
|
|
g.add_option('--after',
|
|
help='match files modified > TIME',
|
|
metavar='TIME')
|
|
g.add_option('--until',
|
|
help='match files modified <= TIME',
|
|
metavar='TIME')
|
|
g.add_option('--since',
|
|
help='match files modified >= TIME',
|
|
metavar='TIME')
|
|
g.add_option('--at',
|
|
help='match files modified at TIME',
|
|
metavar='TIME')
|
|
g.add_option('--created-before',
|
|
help='match files created < TIME',
|
|
metavar='TIME')
|
|
g.add_option('--created-after',
|
|
help='match files created > TIME',
|
|
metavar='TIME')
|
|
g.add_option('--created-until',
|
|
help='match files created <= TIME',
|
|
metavar='TIME')
|
|
g.add_option('--created-since',
|
|
help='match files created >= TIME',
|
|
metavar='TIME')
|
|
g.add_option('--created-at',
|
|
help='match files created at TIME',
|
|
metavar='TIME')
|
|
# TODO
|
|
# p.add_option_group(g)
|
|
|
|
# Size filtering
|
|
g = OptionGroup(p, "Size Filtering",
|
|
"Sizes can be given as a number followed by a prefix. Some examples: "
|
|
"1k, 5kb, 1.5gb, 2g, 1024b")
|
|
g.add_option('--larger-than',
|
|
help='match files larger than SIZE (inclusive)',
|
|
metavar='SIZE')
|
|
g.add_option('--bigger-than', dest='larger_than',
|
|
help=optparse.SUPPRESS_HELP)
|
|
g.add_option('--smaller-than',
|
|
help='match files smaller than SIZE (inclusive)',
|
|
metavar='SIZE')
|
|
p.add_option_group(g)
|
|
|
|
# Type filtering
|
|
g = OptionGroup(p, "Type Filtering",
|
|
"Possible types are "
|
|
"a (all), "
|
|
"f (files), "
|
|
"d (dirs), "
|
|
"r (real), "
|
|
"s (symlinked), "
|
|
"e (real files), "
|
|
"c (real dirs), "
|
|
"x (symlinked files), "
|
|
"y (symlinked dirs). "
|
|
"If multiple types are given they will be unioned together: "
|
|
"--type 'es' would match real files and all symlinks.")
|
|
g.add_option('-t', '--type',
|
|
action='store', default=False, metavar='TYPE(S)',
|
|
help='match only specific types of things (files, dirs, non-symlinks, symlinks)')
|
|
p.add_option_group(g)
|
|
|
|
return p
|
|
|
|
def build_type_set(types):
|
|
if not types:
|
|
return TYPES_ALL
|
|
|
|
result = set()
|
|
for c in types:
|
|
result = result | {
|
|
'a': TYPES_ALL,
|
|
|
|
'e': TYPES_FILE_REAL,
|
|
'x': TYPES_FILE_SYMLINK,
|
|
'c': TYPES_DIR_REAL,
|
|
'y': TYPES_DIR_SYMLINK,
|
|
|
|
'f': TYPES_FILE,
|
|
'd': TYPES_DIR,
|
|
|
|
'r': TYPES_REAL,
|
|
's': TYPES_SYMLINK,
|
|
}[c.lower()]
|
|
|
|
return result
|
|
|
|
def parse_size(size):
|
|
size = size.replace(' ', '') if size else size
|
|
|
|
if not size:
|
|
return None
|
|
|
|
m = SIZE_RE.match(size)
|
|
if not m:
|
|
die('invalid size "%s"' % size)
|
|
|
|
n, unit = m.groups()
|
|
|
|
try:
|
|
n = float(n)
|
|
except ValueError:
|
|
die('invalid size "%s"' % size)
|
|
|
|
unit = {
|
|
'b': BYTE,
|
|
'k': KILOBYTE,
|
|
'm': MEGABYTE,
|
|
'g': GIGABYTE,
|
|
't': TERABYTE,
|
|
'p': PETABYTE,
|
|
}[unit or 'b']
|
|
|
|
return int(n * unit)
|
|
|
|
def is_re(s):
|
|
"""Try to guess if the string is a regex.
|
|
|
|
Err on the side of "True", because treating a literal like a regex only
|
|
slows you down a bit, but the other way around is broken behaviour.
|
|
|
|
"""
|
|
|
|
return not all(c.lower() in string.letters + '_-' for c in s)
|
|
|
|
|
|
def clean_ago_piece(n, unit):
|
|
n = float(n)
|
|
|
|
if unit in ['s', 'sec', 'secs', 'second', 'seconds']:
|
|
unit = SECOND
|
|
if unit in ['m', 'min', 'mins', 'minute', 'minutes']:
|
|
unit = MINUTE
|
|
if unit in ['h', 'hour', 'hours']:
|
|
unit = HOUR
|
|
if unit in ['d', 'day', 'days']:
|
|
unit = DAY
|
|
if unit in ['w', 'week', 'weeks']:
|
|
unit = WEEK
|
|
if unit in ['mo', 'mos', 'month', 'months']:
|
|
unit = MONTH
|
|
if unit in ['y', 'year', 'years']:
|
|
unit = YEAR
|
|
|
|
return n, unit
|
|
|
|
def parse_ago(start_time, timestr):
|
|
pieces = AGO_RE.findall(timestr)
|
|
|
|
units = set()
|
|
result = start_time
|
|
|
|
for piece in pieces:
|
|
n, unit = clean_ago_piece(*piece)
|
|
|
|
if unit in units:
|
|
die('duplicate "%s" in time specification' % unit)
|
|
|
|
units.add(unit)
|
|
result -= n * unit
|
|
|
|
return int(result)
|
|
|
|
def parse_time(timestr):
|
|
"""Parse a time string into milliseconds past the epoch."""
|
|
start_time = int(time.time())
|
|
|
|
timestr = timestr.strip().lower()
|
|
|
|
if AGO_RE.match(timestr):
|
|
return parse_ago(start_time, timestr)
|
|
|
|
return None
|
|
|
|
|
|
def main():
|
|
global options
|
|
|
|
(options, args) = build_option_parser().parse_args()
|
|
|
|
# PATTERN
|
|
if len(args) > 1:
|
|
die("only one search pattern can be given")
|
|
sys.exit(1)
|
|
|
|
query = args[0] if args else ''
|
|
|
|
# --dir
|
|
if options.dir:
|
|
try:
|
|
os.chdir(options.dir)
|
|
except OSError:
|
|
die('could not change to directory "%s"' % options.dir)
|
|
|
|
# --depth
|
|
try:
|
|
options.depth = int(options.depth)
|
|
except ValueError:
|
|
die('depth must be a non-negative integer (got "%s")' % options.depth)
|
|
|
|
# --case-*
|
|
if options.case == CASE_SMART:
|
|
if any(c in string.uppercase for c in query):
|
|
options.case = CASE_SENSITIVE
|
|
else:
|
|
options.case = CASE_INSENSITIVE
|
|
|
|
# --type
|
|
options.type = build_type_set(options.type)
|
|
|
|
# --larger-than, --smaller-than
|
|
options.larger_than = parse_size(options.larger_than)
|
|
options.smaller_than = parse_size(options.smaller_than)
|
|
|
|
if options.larger_than or options.smaller_than:
|
|
# Directory sizes are not supported.
|
|
options.type = options.type - TYPES_DIR
|
|
|
|
# time filtering
|
|
if options.before:
|
|
options.before = parse_time(options.before)
|
|
|
|
if options.after:
|
|
options.after = parse_time(options.after)
|
|
|
|
# Ignore files
|
|
if options.ignore_mode == IGNORE_MODE_RESTRICTED:
|
|
options.ignore_files = ['.ffignore', '.gitignore', '.hgignore']
|
|
options.ignore_vcs_dirs = True
|
|
elif options.ignore_mode == IGNORE_MODE_SEMI:
|
|
options.ignore_files = ['.ffignore']
|
|
options.ignore_vcs_dirs = True
|
|
elif options.ignore_mode == IGNORE_MODE_UNRESTRICTED:
|
|
options.ignore_files = []
|
|
options.ignore_vcs_dirs = True
|
|
elif options.ignore_mode == IGNORE_MODE_ALL:
|
|
options.ignore_files = []
|
|
options.ignore_vcs_dirs = False
|
|
|
|
# Build the query matcher.
|
|
if options.literal or not is_re(query):
|
|
if options.case == CASE_SENSITIVE:
|
|
literal = query
|
|
query = lambda s: literal in s
|
|
else:
|
|
literal = query.lower()
|
|
query = lambda s: literal in s.lower()
|
|
else:
|
|
if options.case == CASE_SENSITIVE:
|
|
r = re.compile(query)
|
|
else:
|
|
r = re.compile(query, re.IGNORECASE)
|
|
query = lambda s: r.search(s)
|
|
|
|
# Go!
|
|
search(query)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import signal
|
|
def sigint_handler(signal, frame):
|
|
sys.stdout.write('\n')
|
|
sys.exit(130)
|
|
signal.signal(signal.SIGINT, sigint_handler)
|
|
main()
|