开发者

How to traverse through the files in a directory?

I have a directory logfiles. I want to process each file inside this directory using a Python script.

for file in directory:
      # do something

How do I do开发者_如何学Python this?


With os.listdir() or os.walk(), depending on whether you want to do it recursively.


In Python 2, you can try something like:

import os.path

def print_it(x, dir_name, files):
    print dir_name
    print files

os.path.walk(your_dir, print_it, 0)

Note: the 3rd argument of os.path.walk is whatever you want. You'll get it as the 1st arg of the callback.

In Python 3 os.path.walk has been removed; use os.walk instead. Instead of taking a callback, you just pass it a directory and it yields (dirpath, dirnames, filenames) triples. So a rough equivalent of the above becomes

import os

for dirpath, dirnames, filenames in os.walk(your_dir):
    print dirpath
    print dirnames
    print filenames


You can list every file from a directory recursively like this.

from os import listdir
from os.path import isfile, join, isdir

def getAllFilesRecursive(root):
    files = [ join(root,f) for f in listdir(root) if isfile(join(root,f))]
    dirs = [ d for d in listdir(root) if isdir(join(root,d))]
    for d in dirs:
        files_in_d = getAllFilesRecursive(join(root,d))
        if files_in_d:
            for f in files_in_d:
                files.append(join(root,f))
    return files


import os
# location of directory you want to scan
loc = '/home/sahil/Documents'
# global dictonary element used to store all results
global k1 
k1 = {}

# scan function recursively scans through all the diretories in loc and return a dictonary
def scan(element,loc):

    le = len(element)

    for i in range(le):   
        try:

            second_list = os.listdir(loc+'/'+element[i])
            temp = loc+'/'+element[i]
            print "....."
            print "Directory %s " %(temp)
            print " "
            print second_list
            k1[temp] = second_list
            scan(second_list,temp)

        except OSError:
            pass

    return k1 # return the dictonary element    


# initial steps
try:
    initial_list = os.listdir(loc)
    print initial_list
except OSError:
    print "error"


k =scan(initial_list,loc)
print " ..................................................................................."
print k

I made this code as a directory scanner to make a playlist feature for my audio player and it will recursively scan all the sub directories present in directory.


You could try glob:

import glob

for file in glob.glob('log-*-*.txt'):
  # Etc.

But glob doesn't work recursively (as far as I know), so if your logs are in folders inside of that directory, you'd be better off looking at what Ignacio Vazquez-Abrams posted.


If you need to check for multiple file types, use

glob.glob("*.jpg") + glob.glob("*.png")

Glob doesn't care about the ordering of the files in the list. If you need files sorted by filename, use

sorted(glob.glob("*.jpg"))


import os
rootDir = '.'
for dirName, subdirList, fileList in os.walk(rootDir):
    print('Found directory: %s' % dirName)
    for fname in fileList:
        print('\t%s' % fname)
    # Remove the first entry in the list of sub-directories
    # if there are any sub-directories present
    if len(subdirList) > 0:
        del subdirList[0]


Here's my version of the recursive file walker based on the answer of Matheus Araujo, that can take optional exclusion list arguments, which happens to be very helpful when dealing with tree copies where some directores / files / file extensions aren't wanted.

import os

def get_files_recursive(root, d_exclude_list=[], f_exclude_list=[], ext_exclude_list=[], primary_root=None):
"""
Walk a path to recursively find files
Modified version of https://stackoverflow.com/a/24771959/2635443 that includes exclusion lists
:param root: path to explore
:param d_exclude_list: list of root relative directories paths to exclude
:param f_exclude_list: list of filenames without paths to exclude
:param ext_exclude_list: list of file extensions to exclude, ex: ['.log', '.bak']
:param primary_root: Only used for internal recursive exclusion lookup, don't pass an argument here
:return: list of files found in path
"""

# Make sure we use a valid os separator for exclusion lists, this is done recursively :(
d_exclude_list = [os.path.normpath(d) for d in d_exclude_list]

files = [os.path.join(root, f) for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))
         and f not in f_exclude_list and os.path.splitext(f)[1] not in ext_exclude_list]
dirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
for d in dirs:
    p_root = os.path.join(primary_root, d) if primary_root is not None else d
    if p_root not in d_exclude_list:
        files_in_d = get_files_recursive(os.path.join(root, d), d_exclude_list, f_exclude_list, ext_exclude_list, primary_root=p_root)
        if files_in_d:
            for f in files_in_d:
                files.append(os.path.join(root, f))
return files


This is an update of my last version that accepts glob style wildcards in exclude lists. The function basically walks into every subdirectory of the given path and returns the list of all files from those directories, as relative paths. Function works like Matheus' answer, and may use optional exclude lists.

Eg:

files = get_files_recursive('/some/path')
files = get_files_recursive('/some/path', f_exclude_list=['.cache', '*.bak'])
files = get_files_recursive('C:\\Users', d_exclude_list=['AppData', 'Temp'])
files = get_files_recursive('/some/path', ext_exclude_list=['.log', '.db'])

Hope this helps someone like the initial answer of this thread helped me :)

import os
from fnmatch import fnmatch

def glob_path_match(path, pattern_list):
    """
    Checks if path is in a list of glob style wildcard paths
    :param path: path of file / directory
    :param pattern_list: list of wildcard patterns to check for
    :return: Boolean
    """
    return any(fnmatch(path, pattern) for pattern in pattern_list)


def get_files_recursive(root, d_exclude_list=None, f_exclude_list=None, ext_exclude_list=None, primary_root=None):
    """
    Walk a path to recursively find files
    Modified version of https://stackoverflow.com/a/24771959/2635443 that includes exclusion lists
    and accepts glob style wildcards on files and directories
    :param root: path to explore
    :param d_exclude_list: list of root relative directories paths to exclude
    :param f_exclude_list: list of filenames without paths to exclude
    :param ext_exclude_list: list of file extensions to exclude, ex: ['.log', '.bak']
    :param primary_root: Only used for internal recursive exclusion lookup, don't pass an argument here
    :return: list of files found in path
    """

    if d_exclude_list is not None:
        # Make sure we use a valid os separator for exclusion lists, this is done recursively :(
        d_exclude_list = [os.path.normpath(d) for d in d_exclude_list]
    else:
        d_exclude_list = []
    if f_exclude_list is None:
        f_exclude_list = []
    if ext_exclude_list is None:
        ext_exclude_list = []

    files = [os.path.join(root, f) for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))
             and not glob_path_match(f, f_exclude_list) and os.path.splitext(f)[1] not in ext_exclude_list]
    dirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
    for d in dirs:
        p_root = os.path.join(primary_root, d) if primary_root is not None else d
        if not glob_path_match(p_root, d_exclude_list):
            files_in_d = get_files_recursive(os.path.join(root, d), d_exclude_list, f_exclude_list, ext_exclude_list,
                                             primary_root=p_root)
            if files_in_d:
                for f in files_in_d:
                    files.append(os.path.join(root, f))
    return files
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜