开发者

Wordlist Generator. Split file Size. How?

I am trying to get this python script to create a new file and continue generating word combinations once a certain file size is reached.

f=open('wordlist', 'w')

def xselections(items, n):
    if n==0: yield []
    else:
        for i in xrange(len(items)):
            for ss in xselections(items, n-1):
                yield [items[i]]+ss

# Numbers = 48 - 57
# Capital = 65 - 90
# Lower = 97 - 122
numb = range(48,58)
cap = range(65,91)
low = range(97,123)
choice = 0
while int(choice) not in range(1,8):
    choice = raw_input('''
    1) Numbers
    2) Capital Letters
    3) Lowercase Letters
    4) Numbers + Capital Letters
    5) Numbers + Lowercase Letters
    6) Numbers + Capital Letters + Lowercase Letters
    7) Capital Letters + Lowercase Letters
    : ''')

choice = int(choice)
poss = []
if choice == 1:
    poss += numb
elif cho开发者_开发知识库ice == 2:
    poss += cap
elif choice == 3:
    poss += low
elif choice == 4:
    poss += numb
    poss += cap
elif choice == 5:
    poss += numb
    poss += low
elif choice == 6:
    poss += numb
    poss += cap
    poss += low
elif choice == 7:
    poss += cap
    poss += low

bigList = []
for i in poss:
    bigList.append(str(chr(i)))

MIN = raw_input("What is the min size of the word? ")
MIN = int(MIN)
MAX = raw_input("What is the max size of the word? ")
MAX = int(MAX)
for i in range(MIN,MAX+1):
    for s in xselections(bigList,i): f.write(''.join(s) + '\n')


You can encapsulate the file rotation behavior in a class. When you are writing some data, the write method will first check if the write would exceed the file size limit; then it calls the rotate method which closes the current file and opens a new one, incrementing the sequence number on the filename:

import os

class LimitWriter(object):

    def __init__(self, basepath, bytelimit):
        self._basepath = basepath
        self._bytelimit = bytelimit
        self._sequence = 0
        self._output = None
        self._bytes = 0
        self._rotate()

    def _rotate(self):
        if self._output:
            self._output.close()
        path = '%s.%06d' % (self._basepath, self._sequence)
        self._output = open(path, 'wb')
        self._bytes = 0
        self._sequence += 1

    def write(self, data):
        size = len(data)
        if (self._bytes + size) > self._bytelimit:
            self._rotate()
        self._bytes += size
        self._output.write(data)

out = LimitWriter('wordlist', 1024 * 1024 * 1)

for i in range(MIN,MAX+1):
    for s in xselections(bigList,i):
        out.write(''.join(s) + '\n')

Would output a series of files which are smaller than 1MB:

1.0M  wordlist.000000
1.0M  wordlist.000001
252K  wordlist.000002

Update - A few more tips on using some of the built-in power of Python to help make your code a bit shorter and easier to follow. I've included comments explaining each part.

Here are the docs on the modules I use below: itertools, string.

import itertools
import os
from string import digits, lowercase, uppercase

# PUT LimitWriter CLASS DEFINITION HERE

LIMIT = 1024 * 1024 * 1

choice = 0
while int(choice) not in range(1,8):
    choice = raw_input('''
    1) Numbers
    2) Capital Letters
    3) Lowercase Letters
    4) Numbers + Capital Letters
    5) Numbers + Lowercase Letters
    6) Numbers + Capital Letters + Lowercase Letters
    7) Capital Letters + Lowercase Letters
    : ''')

MIN = int(raw_input("What is the min size of the word? "))
MAX = int(raw_input("What is the max size of the word? "))

# replace your ranges and large if/else with this
choices = {
    1: digits,
    2: uppercase,
    3: lowercase,
    4: uppercase + lowercase,
    5: digits + lowercase,
    6: digits + uppercase + lowercase,
    7: uppercase + lowercase
    }

# pick one of the sets with the user's choice
chars = choices[int(choice)]

out = LimitWriter('wordlist', LIMIT)

# generate all permutations of the characters from min to max
for length in range(MIN, MAX+1):
    for tmp in itertools.permutations(chars, length):
        out.write(''.join(tmp) + '\n')


Here's the final working code. Change the variable mbXY inside function generate_wordlist to establish the size cap of each file provided it gets bigger than this size. This file has been updated to run under Python 3.2

import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation

if os.name == 'nt':
    def clear_console():
        subprocess.call("cls", shell=True)
        return
else:
    def clear_console():
        subprocess.call("clear", shell=True)
        return

def generate_phone_numbers(area_code):
    f = open('phones.txt', 'w')
    for i in range(2010000, 9999999):
        f.write(area_code + str(i) + '\n')

def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
    mb1   = 1024000
    mb10  = 1024000 * 10
    mb100 = 1024000 * 100
    mb250 = 1024000 * 250
    mb500 = 1024000 * 500
    gb1   = 1024000 * 1000
    file_size_limit = mb10
    out = file_writer(lst_name, file_size_limit)
    for curr_length in range(min_digit, max_digit + 1):
        for curr_digit in itertools.product(lst_chars, repeat=curr_length):
                out.write(''.join(curr_digit) + '\n')

class file_writer(object):

    def __init__(self, basepath, bytelimit):
        self._basepath = basepath
        self._bytelimit = bytelimit
        self._sequence = 0
        self._output = None
        self._bytes = 0
        self._rotate()

    def _rotate(self):
        if self._output:
            self._output.close()
        path = '%s.%06d' % (self._basepath, self._sequence)
        self._output = open(path, 'wb')
        self._bytes = 0
        self._sequence += 1

    def write(self, data):
        size = len(data)
        if (self._bytes + size) > self._bytelimit:
            self._rotate()
        self._bytes += size
        self._output.write(bytes(data, "utf-8"))

choice = 0

while int(choice) not in range(1,6):
    clear_console()
    print ('')
    print ('  wgen - Menu')
    choice = input('''
  1. Phone numbers.
  2. Numbers.
  3. Numbers + Lowercase.
  4. Numbers + Lowercase + Uppercase.
  5. Numbers + Lowercase + Uppercase + Punctuation.

  Enter Option: ''')

print ('')

choice = int(choice)

if choice == 1:
    area_code = input('''
  Please enter Area Code: ''')
    area_code = str(area_code)
    area_code = area_code.strip()
    if len(area_code) == 3:
        print ('')
        print ('  Generating phone numbers for area code ' + area_code + '.')
        print ('  Please wait...')
        generate_phone_numbers(area_code)

if choice == 2:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits
    lst_name = 'num'
    print ('')
    print ('  Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)

if choice == 3:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits + ascii_lowercase
    lst_name = 'num_low'
    print ('')
    print ('  Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)

if choice == 4:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits + ascii_lowercase + ascii_uppercase
    lst_name = 'num_low_upp'
    print ('')
    print ('  Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)

if choice == 5:
    min_digit = input('  Minimum digit? ')
    min_digit = int(min_digit)
    print ('')
    max_digit = input('  Maximum digit? ')
    max_digit = int(max_digit)
    lst_chars = digits + ascii_lowercase + ascii_uppercase + punctuation
    lst_name = 'num_low_upp_pun'
    print ('')
    print ('  Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
    print ('  Please wait...')
    generate_wordlist(lst_chars, min_digit, max_digit, lst_name)
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜