#!/usr/bin/env python ##ttr.py calculates the type to token ratio import sys,re def cmpItems(x,y): return -cmp(x[1],y[1]) #assigns key and value lines = 0 words = 0 types = {} for line in sys.stdin.readlines(): #reads in file
lines = lines + 1 words = words + len(line.split()) for token in line.split(): #separates words with a new line types[token] = types.get(token,0) + 1 #counts tokens print words print len(types)
