find_author - import os.path, math def clean_up(s): '...

Info iconThis preview shows pages 1–3. Sign up to view the full content.

View Full Document Right Arrow Icon
import os.path, math def clean_up(s): ''' Return a version of string str in which all letters have been converted to lowercase and punctuation characters have been stripped from both ends. Inner punctuation is left untouched. ''' punctuation = '''!"',;:.-?)([]<>*#\n\t\r''' for punc in punctuation: new_string = s new_list = new_string.split() s = "" for item in new_list: item = item.replace(punc, "") s = s + " " + item result = s.lower().strip(punctuation) return result def average_word_length(text): ''' Return the average length of all words in text. Do not include surrounding punctuation in words. text is a non-empty list of strings each ending in \n. At least one line in text contains a word.''' string = "" for i in text: string = string + " " + i new_string = clean_up(string) word_list = new_string.split() k = 0 for word in word_list: for char in word: k = k + 1 return ( float(k) / float(len(word_list)) ) def type_token_ratio(text): ''' Return the type token ratio (TTR) for this text. TTR is the number of different words divided by the total number of words. text is a non-empty list of strings each ending in \n. At least one line in text contains a word. ''' string = "" for i in text: string = string + " " + i new_string = clean_up(string) word_list = new_string.split()
Background image of page 1

Info iconThis preview has intentionally blurred sections. Sign up to view the full version.

View Full DocumentRight Arrow Icon
list = [] for i in word_list: list.append(word_list.count(i)) greatest_value = -1 for j in list: if j > greatest_value: greatest_value = j k = 1 diff_word = 0 while k <= greatest_value: diff_word = diff_word + (list.count(k) / k) k = k + 1 return ( float(diff_word) / float(len(word_list)) ) def hapax_legomana_ratio(text): ''' Return the hapax_legomana ratio for this text. This ratio is the number of words that occur exactly once divided
Background image of page 2
Image of page 3
This is the end of the preview. Sign up to access the rest of the document.

This note was uploaded on 11/11/2010 for the course CSC 108 taught by Professor Gries during the Spring '08 term at University of Toronto- Toronto.

Page1 / 5

find_author - import os.path, math def clean_up(s): '...

This preview shows document pages 1 - 3. Sign up to view the full document.

View Full Document Right Arrow Icon
Ask a homework question - tutors are online