Python tarfile progress output?
I'm using the following code to extract a tar file:
import tarfile
tar = tarfile.open("sample.tar.gz")
tar.extractall()
tar.close()
However, I'd like to keep tabs on the progress in the form of which files are being extracted at the moment. How can I do this?
EXTRA BONUS POINTS: is it po开发者_StackOverflow社区ssible to create a percentage of the extraction process as well? I'd like to use that for tkinter to update a progress bar. Thanks!
Both file-progress and global progress:
import io
import os
import tarfile
def get_file_progress_file_object_class(on_progress):
class FileProgressFileObject(tarfile.ExFileObject):
def read(self, size, *args):
on_progress(self.name, self.position, self.size)
return tarfile.ExFileObject.read(self, size, *args)
return FileProgressFileObject
class TestFileProgressFileObject(tarfile.ExFileObject):
def read(self, size, *args):
on_progress(self.name, self.position, self.size)
return tarfile.ExFileObject.read(self, size, *args)
class ProgressFileObject(io.FileIO):
def __init__(self, path, *args, **kwargs):
self._total_size = os.path.getsize(path)
io.FileIO.__init__(self, path, *args, **kwargs)
def read(self, size):
print("Overall process: %d of %d" %(self.tell(), self._total_size))
return io.FileIO.read(self, size)
def on_progress(filename, position, total_size):
print("%s: %d of %s" %(filename, position, total_size))
tarfile.TarFile.fileobject = get_file_progress_file_object_class(on_progress)
tar = tarfile.open(fileobj=ProgressFileObject("a.tgz"))
tar.extractall()
tar.close()
You can just use tqdm()
and print the progress of the number of files being extracted:
import tarfile
from tqdm import tqdm
# open your tar.gz file
with tarfile.open(name=path) as tar:
# Go over each member
for member in tqdm(iterable=tar.getmembers(), total=len(tar.getmembers())):
# Extract member
tar.extract(member=member)
You can specify the members
parameter in extractall()
with tarfile.open(<path>, 'r') as tarball:
tarball.extractall(path=<some path>, members = track_progress(tarball))
def track_progress(members):
for member in members:
# this will be the current file being extracted
yield member
member
are TarInfo
objects, see all available functions and properties here
You could use extract
instead of extractall
- you would be able to print the member names as they are being extracted. To get a list of members, you could use getmembers.
A textual progressbar library can be found here:
- http://code.google.com/p/python-progressbar/
Tkinter snippet:
- http://tkinter.unpythonic.net/wiki/ProgressBar
There's a cool solution here that overrides the tarfile module as a drop-in replacement and lets you specify a callback to update.
https://github.com/thomaspurchas/tarfile-Progress-Reporter/
updated based on comment
To see which file is currently being extracted, the following worked for me:
import tarfile
print "Extracting the contents of sample.tar.gz:"
tar = tarfile.open("sample.tar.gz")
for member_info in tar.getmembers():
print "- extracting: " + member_info.name
tar.extract(member_info)
tar.close()
This is what I use, without monkey patching or needing the number of entries.
def iter_tar_files(f):
total_bytes = os.stat(f).st_size
with open(f, "rb") as file_obj,\
tarfile.open(fileobj=file_obj, mode="r:gz") as tar:
for member in tar.getmembers():
f = tar.extractfile(member)
if f is not None:
content = f.read()
yield member.path, content
# This prints something like: 512/1024 = 50.00%
print(f"{file_obj.tell()} / {total_bytes} = {file_obj.tell()/total_bytes*100:.2f}%")
精彩评论