import os.path, math
def clean_up(s):
''' Return a version of string str in which all letters have been
converted to lowercase and punctuation characters have been stripped
from both ends. Inner punctuation is left untouched. '''
punctuation = '''!"',;:.-?)([]<>*#\n\t\r'''
for punc in punctuation:
new_string = s
new_list = new_string.split()
s = ""
for item in new_list:
item = item.replace(punc, "")
s = s + " " + item
result = s.lower().strip(punctuation)
return result
def average_word_length(text):
''' Return the average length of all words in text. Do not
include surrounding punctuation in words.
text is a non-empty list of strings each ending in \n.
At least one line in text contains a word.'''
string = ""
for i in text:
string = string + " " + i
new_string = clean_up(string)
word_list = new_string.split()
k = 0
for word in word_list:
for char in word:
k = k + 1
return ( float(k) / float(len(word_list)) )
def type_token_ratio(text):
''' Return the type token ratio (TTR) for this text.
TTR is the number of different words divided by the total number of words.
text is a non-empty list of strings each ending in \n.
At least one line in text contains a word. '''
string = ""
for i in text:
string = string + " " + i
new_string = clean_up(string)
word_list = new_string.split()
This
preview
has intentionally blurred sections.
Sign up to view the full version.
list = []
for i in word_list:
list.append(word_list.count(i))
greatest_value = -1
for j in list:
if j > greatest_value:
greatest_value = j
k = 1
diff_word = 0
while k <= greatest_value:
diff_word = diff_word + (list.count(k) / k)
k = k + 1
return ( float(diff_word) / float(len(word_list)) )
def hapax_legomana_ratio(text):
''' Return the hapax_legomana ratio for this text.

This is the end of the preview.
Sign up
to
access the rest of the document.
- Spring '08
- GRIES
- word_list
-
Click to edit the document details