Python爬虫之爬取B站视频(哔哩哔哩)
2021/5/20 22:54:44
本文主要是介绍Python爬虫之爬取B站视频(哔哩哔哩),对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
代码如下
亲测有效
# encoding: utf-8 import requests # 模拟发送请求 import json import re import os # 定义请求头 headers = { 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' } class BilibiliVideoSpider(object): def __init__(self, url, output_root): self.url = url self.output_root = output_root self.headers = { 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' } # 定义请求头 def _match(self, text, pattern): match = re.search(pattern, text) if match is None: print('this pattern was not matched !') return json.loads(match.group(1)) def getHtml(self): try: response = requests.get(url=self.url, headers=self.headers) # 发请求,拿数据 (获取响应对象) print(f'status_code: {response.status_code}') if response.status_code == 200: return response except RequestException: print('html reques error !') def parseHtml(self, response): playinfo = self._match(response.text, '__playinfo__=(.*?)</script><script>') # 视频详情json initial_state = self._match(response.text, r'__INITIAL_STATE__=(.*?);\(function\(\)') # 视频内容json video_url = playinfo['data']['dash']['video'][0]['baseUrl'] # 视频分多种格式,直接取分辨率最高的视频 1080p audio_url = playinfo['data']['dash']['audio'][0]['baseUrl'] # 取音频地址 video_name = initial_state['videoData']['title'] # 取视频名字 # print(f'视频名字为: {video_name}') # print(f'视频地址为:{video_url}') # print(f'音频地址为:{audio_url}') return video_url, audio_url, video_name def downloadVideo(self, video_url, audio_url, video_name): headers.update({"Referer": self.url}) print('开始下载视频: ') video_content = requests.get(video_url, headers=headers) audio_content = requests.get(audio_url, headers=headers) print('%s视频大小:' % video_name, video_content.headers['content-length']) print('%s音频大小:' % video_name, audio_content.headers['content-length']) # 下载视频 received_video = 0 video = f'{self.output_root}video.mp4' with open(video, 'ab') as output: while int(video_content.headers['content-length']) > received_video: headers['Range'] = 'bytes=' + str(received_video) + '-' response = requests.get(video_url, headers=headers) output.write(response.content) received_video += len(response.content) # 下载音频开始 audio_content = requests.get(audio_url, headers=headers) received_audio = 0 audio = f'{self.output_root}audio.mp4' with open(audio, 'ab') as output: while int(audio_content.headers['content-length']) > received_audio: headers['Range'] = 'bytes=' + str(received_audio) + '-' response = requests.get(audio_url, headers=headers) output.write(response.content) received_audio += len(response.content) print('视频下载完成') root_path = os.path.abspath(os.path.dirname(__file__)).split('shippingSchedule')[0] video_dst = root_path+'/download.mp4' self.video_audio_merge(video, audio, video_dst) print(f'下载的视频: {video_dst}') os.remove(video) os.remove(audio) def video_audio_merge(self, video_src, audio_src, video_dst): '''使用ffmpeg单个视频音频合并''' import subprocess command = 'ffmpeg -i %s_video.mp4 -i %s_audio.mp4 -c copy %s.mp4 -y -loglevel quiet' % ( video_src, audio_src, video_dst) subprocess.Popen(command, shell=True) def run(self): response = self.getHtml() video_url, audio_url, video_name = self.parseHtml(response) self.downloadVideo(video_url, audio_url, video_name) def demo(): url = 'https://www.bilibili.com/video/BV1Q5411p7bz?from=search&seid=14643382716113842219' output_root = './' b = BilibiliVideoSpider(url, output_root) b.run() if __name__ == '__main__': demo()
这篇关于Python爬虫之爬取B站视频(哔哩哔哩)的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-12-20Python编程入门指南
- 2024-12-20Python编程基础与进阶
- 2024-12-19Python基础编程教程
- 2024-12-19python 文件的后缀名是什么 怎么运行一个python文件?-icode9专业技术文章分享
- 2024-12-19使用python 把docx转为pdf文件有哪些方法?-icode9专业技术文章分享
- 2024-12-19python怎么更换换pip的源镜像?-icode9专业技术文章分享
- 2024-12-19Python资料:新手入门的全面指南
- 2024-12-19Python股票自动化交易实战入门教程
- 2024-12-19Python股票自动化交易入门教程
- 2024-12-18Python量化入门教程:轻松掌握量化交易基础知识