Recursive directory list/analyze function doesn't seem to recurse right
I wrote what I thought was a straightforward Python script to traverse a given directory and tabulate all the file suffixes it finds. The output looks like this:
OTUS-ASIO:face fish$ sufs
>>> /Users/fish/Dropbox/ost2/face (total 194)
=== 1 1 -
=== css 16 -----
=== gif 14 -----
=== html 12 ----
=== icc 87 --------------------------
=== jpg 3 -
=== js 46 --------------
=== png 3 -
=== zip 2 -
... which would be great, if those values were correct. They are not. Here's what happens when I run it in a subdirectory of the directory I listed above:
OTUS-ASIO:face fish$ cd images/
OTUS-ASIO:images fish$ sufs
>>> /Users/fish/Dropbox/ost2/face/images (total 1016)
=== JPG 3 -
=== gif 17 -
=== ico 1 -
=== jpeg 1 -
=== jpg 901 --------------------------
=== png 87 ---
... It only seems to go one directory level down. Running the script one level up didn't pick up on the 'jpeg' suffix at all, and seemed to miss a good 898 jpg files.
The script in question is here:
#!/usr/bin/env python
# encoding: utf-8
"""
getfilesuffi开发者_开发问答xes.py
Created by FI$H 2000 on 2010-10-15.
Copyright (c) 2010 OST, LLC. All rights reserved.
"""
import sys, os, getopt
help_message = '''
Prints a list of all the file suffixes found in each DIR, with counts.
Defaults to the current directory wth no args.
$ %s DIR [DIR DIR etc ...]
''' % os.path.basename(__file__)
dirs = dict()
skips = ('DS_Store','hgignore')
class Usage(Exception):
def __init__(self, msg):
self.msg = msg
def getmesomesuffixes(rootdir, thisdir=None):
if not thisdir:
thisdir = rootdir
for thing in [os.path.abspath(h) for h in os.listdir(thisdir)]:
if os.path.isdir(thing):
getmesomesuffixes(rootdir), thing)
else:
if thing.rfind('.') > -1:
suf = thing.rsplit('.').pop()
dirs[rootdir][suf] = dirs[rootdir].get(suf, 0) + 1
return
def main(argv=None):
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "h", ["help",])
except getopt.error, msg:
raise Usage(msg)
for option, value in opts:
if option == "-v":
verbose = True
if option in ("-h", "--help"):
raise Usage(help_message)
if len(args) == 0:
args.append(os.getcwd())
for durr in [os.path.abspath(arg) for arg in args]:
if os.path.isdir(durr):
dirs[durr] = dict()
for k, v in dirs.items():
getmesomesuffixes(k)
print ""
for k, v in dirs.items():
sufs = v.items()
sufs.sort()
maxcount = reduce(lambda fs, ns: fs > ns and fs or ns, map(lambda t: t[1], sufs), 1)
mincount = reduce(lambda fs, ns: fs < ns and fs or ns, map(lambda t: t[1], sufs), 1)
total = reduce(lambda fs, ns: fs + ns, map(lambda t: t[1], sufs), 0)
print ">>>\t\t\t%s (total %s)" % (k, total)
for suf, sufcount in sufs:
try:
skips.index(suf)
except ValueError:
print "===\t\t\t%12s\t %3s\t %s" % (suf, sufcount, "-" * (int(float(float(sufcount) / float(maxcount)) * 25) + 1))
print ""
except Usage, err:
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
print >> sys.stderr, "\t for help use --help"
return 2
if __name__ == "__main__":
sys.exit(main())
It seems that getmesomesuffixes()
is subtly not doing what I want it to. I hate to ask such an annoying question, but if anyone can spot whatever amateur-hour error I am making with a quick once-over, it would save me some serious frustration.
Yeah, Won't you be better off if you used os.walk
for root, dirs, files in os.walk(basedir):
... do you stuff ..
See the example at
- http://docs.python.org/library/os.html
Also look at os.path.splitext(path), a finer way to find the type of your file.
>>> os.path.splitext('/d/c/as.jpeg')
('/d/c/as', '.jpeg')
>>>
Both of these together should simplify your code.
import os
import os.path
from collections import defaultdict
def foo(dir='.'):
d = defaultdict(int)
for _, _, files in os.walk(dir):
for f in files:
d[os.path.splitext(f)[1]] += 1
return d
if __name__ == '__main__':
d = foo()
for k, v in sorted(d.items()):
print k, v
精彩评论