{[ promptMessage ]}

Bookmark it

{[ promptMessage ]}

find_author - import os.path math def clean_up(s Return a...

Info iconThis preview shows pages 1–3. Sign up to view the full content.

View Full Document Right Arrow Icon
import os.path, math def clean_up(s): ''' Return a version of string str in which all letters have been converted to lowercase and punctuation characters have been stripped from both ends. Inner punctuation is left untouched. ''' punctuation = '''!"',;:.-?)([]<>*#\n\t\r''' for punc in punctuation: new_string = s new_list = new_string.split() s = "" for item in new_list: item = item.replace(punc, "") s = s + " " + item result = s.lower().strip(punctuation) return result def average_word_length(text): ''' Return the average length of all words in text. Do not include surrounding punctuation in words. text is a non-empty list of strings each ending in \n. At least one line in text contains a word.''' string = "" for i in text: string = string + " " + i new_string = clean_up(string) word_list = new_string.split() k = 0 for word in word_list: for char in word: k = k + 1 return ( float(k) / float(len(word_list)) ) def type_token_ratio(text): ''' Return the type token ratio (TTR) for this text. TTR is the number of different words divided by the total number of words. text is a non-empty list of strings each ending in \n. At least one line in text contains a word. ''' string = "" for i in text: string = string + " " + i new_string = clean_up(string) word_list = new_string.split()
Background image of page 1

Info iconThis preview has intentionally blurred sections. Sign up to view the full version.

View Full Document Right Arrow Icon
list = [] for i in word_list: list.append(word_list.count(i)) greatest_value = -1 for j in list: if j > greatest_value: greatest_value = j k = 1 diff_word = 0 while k <= greatest_value: diff_word = diff_word + (list.count(k) / k) k = k + 1 return ( float(diff_word) / float(len(word_list)) ) def hapax_legomana_ratio(text): ''' Return the hapax_legomana ratio for this text.
Background image of page 2
Image of page 3
This is the end of the preview. Sign up to access the rest of the document.

{[ snackBarMessage ]}