World's Best AI Learning Platform with profoundly Demanding Certification Programs
Designed by IITian's, only for AI Learners.
Designed by IITian's, only for AI Learners.
New to InsideAIML? Create an account
Employer? Create an account
Download our e-book of Introduction To Python
4.5 (1,292 Ratings)
559 Learners
Neha Kumawat
a year ago
from nltk.tokenize import word_tokenize
from nltk.corpus import gutenberg
sample = gutenberg.raw("blake-poems.txt")
token = word_tokenize(sample)
wlist = []
for i in range(50):
wlist.append(token[i])
wordfreq = [wlist.count(w) for w in wlist]
print("Pairs\n" + str(zip(token, wordfreq)))
[([', 1), (Poems', 1), (by', 1), (William', 1), (Blake', 1), (1789', 1), (]', 1), (SONGS', 2), (OF', 3), (INNOCENCE', 2), (AND', 1), (OF', 3), (EXPERIENCE', 1), (and', 1), (THE', 1), (BOOK', 1), (of', 2), (THEL', 1), (SONGS', 2), (OF', 3), (INNOCENCE', 2), (INTRODUCTION', 1), (Piping', 2), (down', 1), (the', 1), (valleys', 1), (wild', 1), (,', 3), (Piping', 2), (songs', 1), (of', 2), (pleasant', 1), (glee', 1), (,', 3), (On', 1), (a', 2), (cloud', 1), (I', 1), (saw', 1), (a', 2), (child', 1), (,', 3), (And', 1), (he', 1), (laughing', 1), (said', 1), (to', 1), (me', 1), (:', 1), (``', 1)]
import nltk
#from nltk.tokenize import word_tokenize
from nltk.corpus import brown
cfd = nltk.ConditionalFreqDist(
(genre, word)
for genre in brown.categories()
for word in brown.words(categories=genre))
categories = ['hobbies', 'romance','humor']
searchwords = [ 'may', 'might', 'must', 'will']
cfd.tabulate(conditions=categories, samples=searchwords)
may might must will
hobbies 131 22 83 264
romance 11 51 45 43
humor 8 8 9 13