基于 requests 模块的 b站评论本地化 首先是用到的一些库
1 2 3 4 5 6 7 8 9 10 11 12 import requestsimport pandas as pdimport osimport tkinter as tkfrom tkinter import filedialogfrom tqdm import tqdmimport refrom openpyxl import Workbook
设置UA伪装
1 2 3 4 5 headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36" , }
然后获取用户自定义输入的BV号并作判断
1 2 3 4 5 6 7 8 9 10 def is_valid_bvid (bvid ): pattern = r'^BV[0-9A-Za-z]{10}$' return bool (re.match (pattern, bvid)) while True : bvid = input ("请输入BV号" ) if is_valid_bvid(bvid): break else : print ("错误:无效的BV号,请重新输入。" )
把Bv号转换成av号
使用https://api.bilibili.com/x/web-interface/view?bvid=BV 来获取
以 BV1rV4y127du 为例 :https://api.bilibili.com/x/web-interface/view?bvid=BV1rV4y127du
下面是json的图片
aid为转换后的av号,获取他
1 2 3 4 5 6 7 8 9 def get_aid_from_bvid (bvid ): url = f"https://api.bilibili.com/x/web-interface/view?bvid={bvid} " response = requests.get(url,headers=headers) json_data = response.json() if json_data.get("code" ) != 0 : raise ValueError(f"Error getting AV number from BV number {bvid} " ) return json_data["data" ]["aid" ]
使用 https://api.bilibili.com/x/v2/reply?type=1&oid=avid&pn=page 来获取对应信息,并进行可视化以及本地化存储
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 def get_video_comments (bvid ): url_template = "https://api.bilibili.com/x/v2/reply?type=1&oid={avid}&pn={page}" comments_data = [] avid = get_aid_from_bvid(bvid) first_page_url = url_template.format (avid=avid, page=1 ) first_page_response = requests.get(first_page_url, headers=headers) first_page_json_data = first_page_response.json() total_comments = first_page_json_data["data" ]["page" ]["count" ] comments_per_page = first_page_json_data["data" ]["page" ]["size" ] total_pages = (total_comments + comments_per_page - 1 ) // comments_per_page for page in tqdm(range (1 , total_pages + 1 ), desc="Downloading comments" ): url = url_template.format (avid=avid, page=page) response = requests.get(url, headers=headers) json_data = response.json() if json_data.get("code" ) == 0 : replies = json_data.get("data" , {}).get("replies" , []) for reply in replies: comment = { "user" : reply["member" ]["uname" ], "content" : reply["content" ]["message" ], "created_time" : pd.to_datetime(reply["ctime" ], unit='s' ), "like" : reply["like" ] } comments_data.append(comment) else : print (f"Error retrieving comments from page {page} " ) break return comments_data
运行
1 comments = get_video_comments(bvid)
使用UI界面进行本地化存储
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 root = tk.Tk() root.withdraw() default_filename = f"{bvid} _评论.xlsx" options = { "defaultextension" : ".xlsx" , "initialfile" : default_filename, "filetypes" : [("Excel files" , ".xlsx" ), ("All files" , ".*" )], "title" : "保存为" , } output_file_path = filedialog.asksaveasfilename(**options) if output_file_path: save_comments_to_excel(comments, output_file_path) print (f"评论数据已保存到文件:{output_file_path} " ) else : print ("未选择文件。操作取消。" )
ok 这样就拿到数据了
下面是一段表格数据
序号
用户名
评论
评论时间
1
笔锋微草
不一样,TS那个组排是粉丝见面会互相娱乐,猴子那个几乎每晚都打相当于日常训练了
2023-06-10 6:20:51
2
DlackMheep
职业选手强于普通高手的区别就在于纪律性和团队配合。更何况这场就是娱乐性质的,你换个有奖池的试试,差距就会体现出来。
2023-06-10 3:41:25
3
Slacker3
大B哥不是退役了 只是没人要
2023-06-10 2:22:16
4
执笔绣江山
我不爱看呜呜渣渣的猴子直播
2023-06-09 17:39:15
OK啦!!