Wordlist Generator. Split file Size. How?
I am trying to get this python script to create a new file and continue generating word combinations once a certain file size is reached.
f=open('wordlist', 'w')
def xselections(items, n):
if n==0: yield []
else:
for i in xrange(len(items)):
for ss in xselections(items, n-1):
yield [items[i]]+ss
# Numbers = 48 - 57
# Capital = 65 - 90
# Lower = 97 - 122
numb = range(48,58)
cap = range(65,91)
low = range(97,123)
choice = 0
while int(choice) not in range(1,8):
choice = raw_input('''
1) Numbers
2) Capital Letters
3) Lowercase Letters
4) Numbers + Capital Letters
5) Numbers + Lowercase Letters
6) Numbers + Capital Letters + Lowercase Letters
7) Capital Letters + Lowercase Letters
: ''')
choice = int(choice)
poss = []
if choice == 1:
poss += numb
elif cho开发者_开发知识库ice == 2:
poss += cap
elif choice == 3:
poss += low
elif choice == 4:
poss += numb
poss += cap
elif choice == 5:
poss += numb
poss += low
elif choice == 6:
poss += numb
poss += cap
poss += low
elif choice == 7:
poss += cap
poss += low
bigList = []
for i in poss:
bigList.append(str(chr(i)))
MIN = raw_input("What is the min size of the word? ")
MIN = int(MIN)
MAX = raw_input("What is the max size of the word? ")
MAX = int(MAX)
for i in range(MIN,MAX+1):
for s in xselections(bigList,i): f.write(''.join(s) + '\n')
You can encapsulate the file rotation behavior in a class. When you are writing some data, the write method will first check if the write would exceed the file size limit; then it calls the rotate method which closes the current file and opens a new one, incrementing the sequence number on the filename:
import os
class LimitWriter(object):
def __init__(self, basepath, bytelimit):
self._basepath = basepath
self._bytelimit = bytelimit
self._sequence = 0
self._output = None
self._bytes = 0
self._rotate()
def _rotate(self):
if self._output:
self._output.close()
path = '%s.%06d' % (self._basepath, self._sequence)
self._output = open(path, 'wb')
self._bytes = 0
self._sequence += 1
def write(self, data):
size = len(data)
if (self._bytes + size) > self._bytelimit:
self._rotate()
self._bytes += size
self._output.write(data)
out = LimitWriter('wordlist', 1024 * 1024 * 1)
for i in range(MIN,MAX+1):
for s in xselections(bigList,i):
out.write(''.join(s) + '\n')
Would output a series of files which are smaller than 1MB:
1.0M wordlist.000000
1.0M wordlist.000001
252K wordlist.000002
Update - A few more tips on using some of the built-in power of Python to help make your code a bit shorter and easier to follow. I've included comments explaining each part.
Here are the docs on the modules I use below: itertools, string.
import itertools
import os
from string import digits, lowercase, uppercase
# PUT LimitWriter CLASS DEFINITION HERE
LIMIT = 1024 * 1024 * 1
choice = 0
while int(choice) not in range(1,8):
choice = raw_input('''
1) Numbers
2) Capital Letters
3) Lowercase Letters
4) Numbers + Capital Letters
5) Numbers + Lowercase Letters
6) Numbers + Capital Letters + Lowercase Letters
7) Capital Letters + Lowercase Letters
: ''')
MIN = int(raw_input("What is the min size of the word? "))
MAX = int(raw_input("What is the max size of the word? "))
# replace your ranges and large if/else with this
choices = {
1: digits,
2: uppercase,
3: lowercase,
4: uppercase + lowercase,
5: digits + lowercase,
6: digits + uppercase + lowercase,
7: uppercase + lowercase
}
# pick one of the sets with the user's choice
chars = choices[int(choice)]
out = LimitWriter('wordlist', LIMIT)
# generate all permutations of the characters from min to max
for length in range(MIN, MAX+1):
for tmp in itertools.permutations(chars, length):
out.write(''.join(tmp) + '\n')
Here's the final working code. Change the variable mbXY inside function generate_wordlist to establish the size cap of each file provided it gets bigger than this size. This file has been updated to run under Python 3.2
import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation
if os.name == 'nt':
def clear_console():
subprocess.call("cls", shell=True)
return
else:
def clear_console():
subprocess.call("clear", shell=True)
return
def generate_phone_numbers(area_code):
f = open('phones.txt', 'w')
for i in range(2010000, 9999999):
f.write(area_code + str(i) + '\n')
def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
mb1 = 1024000
mb10 = 1024000 * 10
mb100 = 1024000 * 100
mb250 = 1024000 * 250
mb500 = 1024000 * 500
gb1 = 1024000 * 1000
file_size_limit = mb10
out = file_writer(lst_name, file_size_limit)
for curr_length in range(min_digit, max_digit + 1):
for curr_digit in itertools.product(lst_chars, repeat=curr_length):
out.write(''.join(curr_digit) + '\n')
class file_writer(object):
def __init__(self, basepath, bytelimit):
self._basepath = basepath
self._bytelimit = bytelimit
self._sequence = 0
self._output = None
self._bytes = 0
self._rotate()
def _rotate(self):
if self._output:
self._output.close()
path = '%s.%06d' % (self._basepath, self._sequence)
self._output = open(path, 'wb')
self._bytes = 0
self._sequence += 1
def write(self, data):
size = len(data)
if (self._bytes + size) > self._bytelimit:
self._rotate()
self._bytes += size
self._output.write(bytes(data, "utf-8"))
choice = 0
while int(choice) not in range(1,6):
clear_console()
print ('')
print (' wgen - Menu')
choice = input('''
1. Phone numbers.
2. Numbers.
3. Numbers + Lowercase.
4. Numbers + Lowercase + Uppercase.
5. Numbers + Lowercase + Uppercase + Punctuation.
Enter Option: ''')
print ('')
choice = int(choice)
if choice == 1:
area_code = input('''
Please enter Area Code: ''')
area_code = str(area_code)
area_code = area_code.strip()
if len(area_code) == 3:
print ('')
print (' Generating phone numbers for area code ' + area_code + '.')
print (' Please wait...')
generate_phone_numbers(area_code)
if choice == 2:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits
lst_name = 'num'
print ('')
print (' Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 3:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase
lst_name = 'num_low'
print ('')
print (' Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 4:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase + ascii_uppercase
lst_name = 'num_low_upp'
print ('')
print (' Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
if choice == 5:
min_digit = input(' Minimum digit? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' Maximum digit? ')
max_digit = int(max_digit)
lst_chars = digits + ascii_lowercase + ascii_uppercase + punctuation
lst_name = 'num_low_upp_pun'
print ('')
print (' Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
精彩评论