import requests import hashlib import time import os import csv from hanlp_restful import HanLPClient
def md5_to_w_rid(y, a): input_string = y + a md5_hash = hashlib.md5() md5_hash.update(input_string.encode('utf-8')) w_rid = ''.join(f'{byte:02x}' for byte in md5_hash.digest()) return w_rid
def save_to_csv(s, uname, content, emotion): file_exists = os.path.exists('ba数据分析.csv') print('...正在写入文件') with open('ba数据分析.csv', 'a', newline='', encoding='utf-8') as csvfile: fieldnames = ['情感', '楼层', '用户名', '评论内容'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if not file_exists: writer.writeheader()
writer.writerow({ '情感': emotion, '楼层': s, '用户名': uname, '评论内容': content })
def fetch_comments(oid, headers, cookies, s): a = 'ea1db124af3c7062474693fa704f4ff8' wts = int(time.time()) session_id = "1766149779423829" y = f'mode=3&oid={oid}&pagination_str=%7B%22offset%22%3A%22%7B%5C%22type%5C%22%3A1%2C%5C%22direction%5C%22%3A1%2C%5C%22session_id%5C%22%3A%5C%22{session_id}%5C%22%2C%5C%22data%5C%22%3A%7B%7D%7D%22%7D&plat=1&type=1&web_location=1315875&wts={wts}' w_rid = md5_to_w_rid(y, a)
url = "https://api.bilibili.com/x/v2/reply/wbi/main" params = { "oid": oid, "type": "1", "mode": "3", "plat": "1", "pagination_str": "{\"offset\":\"{\\\"type\\\":1,\\\"direction\\\":1,\\\"session_id\\\":\\\"" + session_id + "\\\",\\\"data\\\":{}}\"}", "web_location": "1315875", "w_rid": w_rid, "wts": wts }
try: response = requests.get(url, headers=headers, cookies=cookies, params=params).json() comments = response.get('data', {}).get('replies', []) if not comments: print("没有更多评论了。") return s, False
HanLP = HanLPClient('https://www.hanlp.com/api', auth='', language='zh') for comment in comments: uname = comment['member']['uname'] content = comment['content']['message'] emotion = HanLP.sentiment_analysis(content) s += 1 print(f'第{s}楼\n用户名:{uname}\n评论内容:{content}\n\n') save_to_csv(s, uname, content, emotion) return s, True except Exception as e: print(f"请求失败: {e}") return s, False
if __name__ == "__main__": oid = 1556265189 headers = { } cookies = { }
s = 0 for i in range(20): s, success = fetch_comments(oid, headers, cookies, s) if not success: break time.sleep(1)
|