Hello, Hello everyone , I'm Chen Chen ~
today , Let me teach you how to use python Come and eat melons ~
I've been brushed by Wang Leehom's melon these days , Many female friends expressed great shock and anger
My general impression of Wang Leehom is only based on his high education 、 Fluent English and full of classics , Many of his beautiful songs are still popular in the streets , I didn't expect such behavior .
Today I use Python To capture the content of the comment area below these two parties , And draw a cloud of words , The main codes are as follows
@retry(stop=stop_after_attempt(7))
def do_requests(uid, pageNum):
headers = {
"cookie": "SCF=Anhuv5v0Lu8oFE06-PmKm-uqVmUQgSwrLYauTMNCvEmRH0iOd-jT0poB-pgkpX_aJsOYqZjgw_F8TAZ0SL_aE9Q.; _T_WM=32be9637e54d4f58408755d6f8100d5c; SUB=_2A25MueV4DeRhGeRN7lQY8ynEwziIHXVsRYswrDV6PUJbkdAKLRPSkW1NU7D9XCuoP6vJEUUVjb0HcSPigsLzxFaW; SSOLoginState=1639814440",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
}
url = "https://weibo.cn/repost/L6w2sfDXb?&uid={}&&page={}".format(uid, pageNum)
response = requests.get(url, headers = headers)
return response.text
def get_comment(html_data):
html_text = BeautifulSoup(html_data, 'lxml')
comment_list = html_text.select("span.ctt")
return comment_listdef jieba_():
stop_words = set([line.strip() for line in open("chineseStopWords.txt", encoding="GBK").readlines()])
for word in [" reply ", " Is there any "]:
stop_words.add(word)
comment_list = []
with open("comment_data.txt", "r", encoding="utf-8") as comment_data_list:
for comment in comment_data_list:
comment_list.append(comment)
text = ", ".join(comment_list)
word_num = jieba.lcut(text, cut_all=False)
rule = re.compile(r"^[\u4e00-\u9fa5]+$")
word_num_selected = [word for word in word_num if word not in stop_words and
re.search(rule, word) and len(word) >= 2]
return word_num_selected
def plot_word_cloud(text):
# Open the background image of the word cloud
cloud_mask = np.array(Image.open('gua_1.jpg'))
# Define some properties of word cloud
wc = WordCloud(
# The background image is divided into white
background_color='white',
# Background pattern
mask=cloud_mask,
# Show the maximum number of words
max_words=200,
# According to Chinese
font_path='KAITI.ttf',
# Maximum size
max_font_size=100
)
text_ = ", ".join(text)
# Word cloud function
x = wc.generate(text_)
# Generate word cloud image
image = x.to_image()
# Show word cloud pictures
image.show()
# Save word cloud pictures
wc.to_file('melon_1.png')The word cloud generated in the comment area for male owners is as follows , It can be seen that it is all abuse and resentment against the male Lord , There are many people who want to block the male owner .
And the comment area under his ex-wife's post , The generated word cloud is as follows , Everyone is encouraging his ex-wife to be strong 、 Come on, face life , Out of the trough of life .
Is it right python A lot of keywords can be extracted at once , Understand people's views on this matter
Interested partners can also try
My sharing ends here , If you like, just praise and pay attention ~