一共三个文件
- Hlm.py
- stop_words.txt
- 红楼梦.txt
(tips: txt文件已放在文章最后)
废话不多说,直接上效果图和代码:
- 效果图
- Hlm.py 代码部分:
import matplotlib.pyplot as plt
import jieba
import wordcloud
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['simple']
def wordFreq(filepath,text,topn):
words = jieba.lcut(text.strip())
counts = {}
stopwords = [line.strip() for line in open('stop_words.txt', 'r', encoding='utf-8').readlines()]
word_clear = []
for word in words:
if(len(word) == 1):
continue
elif word not in stopwords:
if word == "凤姐儿":
word = "凤姐"
elif word == "林黛玉" or word == "林妹妹" or word == "黛玉笑":
word == "黛玉"
elif word == "宝二爷":
word == "宝玉"
elif word == "袭人道":
word == "袭人"
word_clear.append(word)
counts[word] = counts.get(word, 0) + 1
items = list(counts.items())
items.sort(key = lambda x:x[1], reverse=True)
for i in range(topn):
word, count = items[i]
print(f"{word}:{count}")
return word_clear
def gen_cloudword(txt):
wcloud=wordcloud.WordCloud(font_path = r'C:\Windows\Fonts\simhei.ttf', width=1000, max_words = 100, height = 860, margin = 2).generate(txt)
wcloud.to_file("红楼梦cloud_star.png")
plt.imshow(wcloud)
plt.axis('off')
plt.show()
text = open('红楼梦.txt', "r",encoding='utf-8').read()
words_clear = wordFreq('红楼梦.txt',text,10)
gen_cloudword(' '.join(words_clear))
txt文本文件 链接:https://pan.baidu.com/s/1ZPvhT0rJddGPS4YUoYzzjQ?pwd=1234 提取码:1234
|