paragraphstats

Paragraphstats - if key in sentences = cf_dict[key number_words = len(word_list print"Filename" fileName print"Total lines%d"

Info iconThis preview shows page 1. Sign up to view the full content.

View Full Document Right Arrow Icon
#Tom Pampalone #11/11/10 #CS-100 Section 05 def paragraphStats(fileName): textf = open(fileName) lines = 0 blanklines = 0 word_list = [] cf_dict = {} for line in textf: lines += 1 if line.startswith('\n'): blanklines += 1 word_list.extend(line.split()) for char in line.lower(): cf_dict[char] = cf_dict.get(char, 0) + 1 textf.close() #Create a dictionary of words to simplify searches word_dict = {} #A list of punctuation marks in a string punctuations = [",", ".", "!", "?", ";", ":"] for word in word_list: #Grab the last character of each word lastchar = word[-1] #Drop the punctuation marks if lastchar in punctuations: word = word.rstrip(lastchar) word = word.lower() word_dict[word] = word_dict.get(word, 0) + 1 sentences = 0 for key in cf_dict.keys():
Background image of page 1
This is the end of the preview. Sign up to access the rest of the document.

Unformatted text preview: if key in '.!?': sentences += cf_dict[key] number_words = len(word_list) print("Filename: " , fileName) print( "Total lines: %d" % lines ) print( "Sentences: %d" % sentences ) print( "Words: %d" % number_words ) print('-' * 30) num = float(number_words) avg_wordsize = len(''.join([k*v for k, v in word_dict.items()]))/num #MCW = Most Common Word mcw = sorted([(v, k) for k, v in word_dict.items()], reverse=True) #MCC = Most Common Character mcc = sorted([(v, k) for k, v in cf_dict.items()], reverse=True) print( "Average word length : %0.2f" % avg_wordsize ) print( "3 most common words : %s" % mcw[:3] ) print( "3 most common characters: %s" % mcc[:3] ) paragraphStats('callOfTheWild.txt')...
View Full Document

This document was uploaded on 04/28/2011.

Ask a homework question - tutors are online