1KNN算法
2朴素贝叶斯算法
题目:20类新闻分类
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
def nb_news():
# 1)获取数据
news = fetch_20newsgroups(subset='all')
# 2)划分数据集
x_train, x_test, y_train, y_test = train_test_split(news.data, news.target)
# 3)文本特征抽取
transfer = TfidfVectorizer()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4)朴素贝叶斯算法预估
estimator = MultinomialNB()
estimator.fit(x_train, y_train)
# 5)模型评估
# 方法1:直接比对真实值和预测值
y_predict = estimator.predict(x_test)
print('y_predict\n', y_predict)
print('真实值和预测值对比\n', y_predict == y_test)
# 方法2:计算准确率
score = estimator.score(x_test, y_test)
print('准确率为\n', score)
return None
if __name__ == '__main__':
nb_news()
结果
|