python作业06

  1. 2
  2. 3
  3. 4

2

《红楼梦》词频

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import jieba
txt = open ("红楼梦.txt","r", encoding="ANSI").read()
words = jieba.lcut(txt)
counts={}
for word in words:
if len(word)==1:
continue
else:
counts[word]=counts.get(word,0)+1
items = list(counts.items())
items.sort(key = lambda x:x[1],reverse =True)
for i in range(20):
word,count = items[i]
print("{0:<10}{1:>5}".format(word,count))

image-20231016203325170

3

《红楼梦》出场频率前20位角色

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import jieba
txt = open ("红楼梦.txt","r", encoding="ANSI").read()
words = jieba.lcut(txt)
excludes ={"一声","媳妇","不用","人家","妹妹","看见","问道","如何","二爷","屋里","丫头","小丫头","听说","什么","一个","我们","那里","如今","你们","说道","知道","起来","姑娘","这里","出来","他们","众人","自己","进来","这样","听见","这个","不知","不是","没有","两个","怎么","只见","自己","一面","大家","只是","回来","就是","东西","咱们","告诉","所以","出去","不敢","这些","只得","不好","的话","不过","一时","姐姐","太太","奶奶","过来","不能","心里","老爷","二人","银子","今日","如此","还有","几个","答应","这么","说话","只管","一回","那边","这话","外头","打发","自然","今儿","罢了","那些"}
counts={}
for word in words:
if len(word)==1:
continue
elif word == "宝玉":
rword = "贾宝玉"
elif word =="凤姐" or word =="凤姐儿":
rword = "王熙凤"
elif word == "老太太":
rword ="贾母"
elif word =="宝钗":
rword = "薛宝钗"
elif word =="探春":
rword ="贾探春"
elif word =="湘云":
rword ="史湘云"
elif word =="黛玉":
rword ="林黛玉"
else:
rword =word
counts[rword]=counts.get(rword,0)+1
for word in excludes:
del counts[word]
items = list(counts.items())
items.sort(key = lambda x:x[1],reverse =True)
for i in range(20):
word,count = items[i]
print("{0:<10}{1:>5}".format(word,count))

image-20231016203536082

4

《红楼梦》词云

1
2
3
4
5
6
import wordcloud
import jieba
txt =open ("红楼梦.txt","r", encoding="ANSI").read()
w = wordcloud.WordCloud(width=1000, font_path="msyh.ttc", height=700)
w.generate(" ".join(jieba.lcut(txt)))
w.to_file("hlm_name.png")

hlm_name


转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论