April 2018
Beginner
552 pages
13h 58m
English
from sklearn.datasets import fetch_20newsgroups
category_mapping = {'misc.forsale': 'Sellings', 'rec.motorcycles': 'Motorbikes',
'rec.sport.baseball': 'Baseball', 'sci.crypt': 'Cryptography',
'sci.space': 'OuterSpace'}
training_content = fetch_20newsgroups(subset='train',
categories=category_mapping.keys(), shuffle=True, random_state=7)
from sklearn.feature_extraction.text import CountVectorizer vectorizing = CountVectorizer() train_counts = vectorizing.fit_transform(training_content.data) print "nDimensions of training data:", train_counts.shape
from sklearn.naive_bayes ...