若结果有误,请指出噢
import numpy as np
def loadDataSet():
postingList=[['my','dog','has','flea','problems','help','please'],
['maybe','not','take','him','to','dog','park','stupid'],
['my','dalmation','is','so','cute','I','love','him'],
['stop','posting','stupid','worthless','garbage'],
['mr','licks','ate','my','steak','how','to','stop','him'],
['quit','buying','worthless','dog','food','stupid']]
classVec = [0,1,0,1,0,1]
return postingList,classVec
def createVocaList(dataSet):
vocabSet = set([])
for document in dataSet:
vocabSet = vocabSet|set(document)
return list(vocabSet)
def setofwords2Vec(feature,inputset):
returnVec = [0]*len(feature)
for word in inputset:
if word in feature:
returnVec[feature.index(word)] = 1
else:
print('the word:%s is not in my vocabList'%word)
return returnVec
def trainNB0(wordSetVec,labels):
numDataSet = len(wordSetVec)
pAbusive = sum(labels)/numDataSet
featureNum = len(wordSetVec[0])
p0Num = np.ones(featureNum);p1Num = np.ones(featureNum)
p0Denom = 2;p1Denom = 2
for i in range(numDataSet):
if labels[i] == 1:
p1Num += wordSetVec[i]
p1Denom += sum(wordSetVec[i])
else:
p0Num += wordSetVec[i]
p0Denom += sum(wordSetVec[i])
p0Vec = np.log(p0Num/p0Denom)
p1Vec = np.log(p1Num/p1Denom)
return p0Vec,p1Vec,pAbusive
def NBC(TestWordsVec,p0Vec,p1Vec,pclasses):
p1 = sum(TestWordsVec*p1Vec) + np.log(pclasses)
p0 = sum(TestWordsVec*p0Vec) + np.log(1-pclasses)
if p1>p0:
return "1标签"
if p1<p0:
return "0标签"
def testingNBC():
listOPosts,listclasses = loadDataSet()
print('词汇列表:',listOPosts)
MyvocabList = createVocaList(listOPosts)
print('特征属性:',MyvocabList)
trainMatrix = []
for postinDoc in listOPosts:
trainMatrix.append(setofwords2Vec(MyvocabList,postinDoc))
print('特征向量:',trainMatrix)
p0,p1,pA = trainNB0(trainMatrix,listclasses)
print('概率:',p0,p1,pA)
testEntry = ['love','my','dalmation']
testVec = setofwords2Vec(MyvocabList,testEntry)
print('分类器分类的类别为:',NBC(testVec,p0,p1,pA))
testEntry1 = ['stupid','garbage']
testVec1 = setofwords2Vec(MyvocabList,testEntry1)
print('分类器分类的类别:',NBC(testVec1,p0,p1,pA))
testingNBC()
|