Python: Sort text file twice and split to different files?
How do I sort first by line length, then alphabetically, then split by line length into seperate files? I have a word list file like so:
a
actors开发者_C百科
an
b
batter
but
I require one file (1.txt, 2.txt) for each line length, each sorted alphabetically. How might this be done?
The resulting file should look like:
1.txt
a
b
...
2.txt
an
by
...
etc.
from collections import defaultdict
OUTF = "{0}.txt".format
def sortWords(wordList):
d = defaultdict(list)
for word in wordList:
d[len(word)].append(word)
return d
def readWords(fname):
with open(fname) as inf:
return [word for word in (line.strip() for line in inf.readlines()) if word]
def writeWords(fname, wordList):
wordList.sort()
with open(fname, 'w') as outf:
outf.write('\n'.join(wordList))
def main():
for wordLen,wordList in sortWords(readWords('words.txt')).iteritems():
writeWords(OUTF(wordLen), wordList)
if __name__=="__main__":
main()
You can pass a function into sort. Something like lambda a, b: (len(a) < len(b)) if (len(a) != len(b)) else (a < b)
ought to do it.
to add to the previous answer:
files = {}
for word in sort(words, lambda a,b: (len(a) < len(b)) if (len(a) != len(b)) else (a < b)):
if len(word) not in files:
files[len(word)] = open("{0}.txt".format(len(word)), "w")
files[len(word)].write("{0}\n".format(word))
You can do it like this:
text = [x.strip() for x in """a
actors
an
b
batter
but""".splitlines() if x.strip()]
files = {}
for word in text:
n = len(word)
if n not in files:
files[n] = open("%d.txt" % n, 'wt')
files[n].write(word + "\n")
for file in files.itervalues():
file.close()
精彩评论