目标站点 有声小说-有声读物-电台广播在线听书-蜻蜓FM
python爬虫实战搞的是PC端,移动端可能有更加方便的接口,欢迎大家留意讨论反正是练手就随便抓一下pc端的包好了
主要内容
这个例子的登录非常简单没有任何加密直接post就行真的是一点加密和未知参数都没有
私信小编01即可获取大量Python学习资源
python实现,注意此处是类的一个方法不完整无法直接运行
def login(self%2cuser_id%2cpassword): data = { 'account_type': '5'%2c 'device_id': 'web'%2c 'user_id': user_id%2c 'password': password } response = self.session.post(self.login_url%2cdata=data) if response.status_code==200: temp = response.json errorno = temp['errorno'] errormsg = temp['errormsg'] if errorno == 0: print('login successful!'%2c'登录成功!') data = temp['data'] self.qingting_id = data['qingting_id'] self.access_token = data['access_token'] else: print('Login failed'%2c'登录失败') print(errormsg)
录成功后我们把access_token和qingting_id拿到,相当于一个登录后的标志如果账号是会员相当于一个会员标志
音频的真实地址请求了这样一个url:
https://audio.qingting.fm/audiostream/redirect/294280/11604885
其中294280是专辑id,
11604885是当前音频的id
其中还带了一些参数比如access_token%2cqingting_id(登录成功的response中有,上图没有登录所有是空的),另外还有一些比如t是时间戳,
device_id=MOBILESITE(不变)
关键就在于sign(尝试过不加sign会返回一个签名错误)
可以通过全局搜索试一下是哪个js生成的这个sign我全局搜索了一下
device_id
在mian.一大堆.js 找到了生成sign的函数(需要自己分辨一下是一个device_id: “MOBILESITE”的)
搜索其他关键字应该也是可以顺利找到的
这里的sign是u这个变量它是由c这个变量通过一堆加密处理得到的
我们可以控制台输出一下u和c
所以我们就知道了sign实际是加密了请求的其他参数
一开始我误以为是单纯的MD5所以卡了好久(还进入函数内部看他具体是怎么实现的看的一头雾水)
其实代码已经告诉用的是
createHmac(“md5”%2c “fpMn12&38f_2e”)
查了下Hmac发现就是一种现成的算法,还有不同的模式MD5是其中一种,需要一个秘钥
这里什么都告诉你了,用Hmac-md5秘钥是fpMn12&38f_2e
找个在线加密的网站试了下,果然和刚才控制台输出的一样
python的话需要import
hmac这个库
import hmac import time base_url = "https://audio.qingting.fm" bookid = "294280" id = "11590788" access_token = "" qingting_id ="" timestamp = str(round(time.time*1000)) data = f"/audiostream/redirect/{bookid}/{id}?access_token={access_token}&device_id=MOBILESITE&qingting_id={qingting_id}&t={timestamp}" message = data.encode('utf-8') key = "fpMn12&38f_2e".encode('utf-8') sign = hmac.new(key%2c message%2c digestmod='MD5').hexdigest whole_url = base_url+data+"&sign="+sign print(whole_url)
得到一个音频可以做到了剩下的就是得到一堆了,其实我们得到每个音频的id就可以了
我请求的是这个接口
info_api = ‘https://i.qingting.fm/capi/channel/{self.bookid}/programs/{self.version}?curpage={str(page)}&pagesize=30&order=asc’
version在声书主页的源代码中,只要改curpage就可以翻页了 完整源码
import requests
import re
import hmac
import time
from tqdm import tqdm
from bs4 import BeautifulSoup
import os
import json
import sys
import urllib3
urllib3.disable_warnings
class QingTing:
def __init__(self%2cuser_id%2cpassword%2cbookurl%2cifLogin):
self.ifLogin = ifLogin
self.user_id = user_id
self.password = password
self.session = requests.session
self.session.headers.update({'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML%2c like Gecko) Chrome/79.0.3945.130 Safari/537.36'})
self.login_url = "https://u2.qingting.fm/u2/api/v4/user/login"
self.qingting_id = ''
self.access_token = ''
self.bookurl = bookurl
# self.bookurl = 'https://www.qingting.fm/channels/257790'
self.bookid = self.bookurl.split('/')[-1]
self.version = ''
self.qingtinghost = 'https://audio.qingting.fm'
self.save_path = ''
self.bookname = ''
def login(self%2cuser_id%2cpassword):
data = {
'account_type': '5'%2c
'device_id': 'web'%2c
'user_id': user_id%2c
'password': password
}
response = self.session.post(self.login_url%2cdata=data%2cverify=False)
if response.status_code==200:
temp = response.json
errorno = temp['errorno']
errormsg = temp['errormsg']
if errorno == 0:
print('login successful!'%2c'登录成功!')
data = temp['data']
self.qingting_id = data['qingting_id']
self.access_token = data['access_token']
else:
print('Login failed'%2c'登录失败')
print(errormsg)
time.sleep(10)
sys.exit(0)
def __get_version(self):
response = self.session.get(url=self.bookurl%2cverify=False)
if response.status_code==200:
soup = BeautifulSoup(response.text%2c'lxml')
temp_bookname = soup.select('div.album-info-root > div.top > div.info.right > h1')[0].string
replaced_pattern = '[\\\/:\*\?\"<>|]'
self.bookname = re.sub(replaced_pattern%2c' '%2ctemp_bookname%2cflags=re.M +re.S)
if not os.path.exists(self.bookname):
os.makedirs(self.bookname)
matched = re.search('\"version\":\"(\w+)"'%2cresponse.text%2cre.S)
if matched:
version = matched.group(1)
self.version = version
# return version
def __get_total_page(self):
self.__get_version
page = 1
info_api = f'https://i.qingting.fm/capi/channel/{self.bookid}/programs/{self.version}?curpage={str(page)}&pagesize=30&order=asc'
response = self.session.get(info_api%2cverify=False)
if response.status_code==200:
temp = response.json
total = temp['data']['total']
total_page = int(int(total)/30)+1
return total%2ctotal_page
def get_book_info(self):
total%2ctotal_page = self.__get_total_page
print(self.bookname%2c'共{}集'.format(total))
for page in range(1%2ctotal_page+1):
info_api = f'https://i.qingting.fm/capi/channel/{self.bookid}/programs/{self.version}?curpage={str(page)}&pagesize=30&order=asc'
response = self.session.get(info_api%2cverify=False)
programs = response.json['data']['programs']
for program in programs:
# print(program['id']%2cprogram['title'])
yield program
def get_src(self%2cid):
bookid = self.bookid
access_token = self.access_token
qingting_id =self.qingting_id
timestamp = str(round(time.time*1000))
data = f"/audiostream/redirect/{bookid}/{id}?access_token={access_token}&device_id=MOBILESITE&qingting_id={qingting_id}&t={timestamp}"
message = data.encode('utf-8')
key = "fpMn12&38f_2e".encode('utf-8')
sign = hmac.new(key%2c message%2c digestmod='MD5').hexdigest
whole_url = self.qingtinghost+data+"&sign="+sign
return whole_url
def downloadFILE(self%2curl%2cname):
resp = self.session.get(url=url%2cstream=True%2cverify=False)
if resp.headers['Content-Type'] =='audio/mpeg':
content_size = int(int(resp.headers['Content-Length'])/1024)
with open(name%2c "wb") as f:
print("Pkg total size is:"%2ccontent_size%2c'k%2cstart...')
for data in tqdm(iterable=resp.iter_content(1024)%2ctotal=content_size%2cunit='k'%2cdesc=name):
f.write(data)
print(name %2c "download finished!")
else:
errorno = resp.json['errorno']
errormsg = resp.json['errormsg']
print('没有权限下载%2c请登录已购此音频的账号。')
print('errorno:'%2cerrorno%2cerrormsg)
def run(self):
if self.ifLogin:
self.login(self.user_id%2cself.password)
programs = self.get_book_info
count = 0
for program in programs:
count+=1
try:
id = program['id']
title = str(count).zfill(4)+' '+program['title']+'.m4a'
if not self.bookname =='':
title = os.path.join(self.bookname%2ctitle)
whole_url = self.get_src(id)
self.downloadFILE(whole_url%2ctitle)
except Exception as e:
print(e)
with open('log.txt'%2c'a'%2cencoding='utf-8') as f:
f.write(str(count)+str(e)+'\n')
def get_config_info:
with open('config.json'%2c'r'%2cencoding='utf-8') as f:
config = json.loads(f.read)
return config
if __name__ == "__main__":
# pyinstaller -F -i ico.ico QingTingFM.py
config = get_config_info
if config["ifLogin"]:
bookurl = input('请输入要下载音频的主页链接:(如[url=https://www.qingting.fm/channels/257790]https://www.qingting.fm/channels/257790[/url])')
isvalid = re.search('https://www.qingting.fm/channels/\d+'%2cbookurl)
if isvalid:
q = QingTing(config["user_id"]%2cconfig["password"]%2cbookurl%2c1)
q.run
else:
print("输入的主页格式错误")
else:
# 不登录
bookurl = input('请输入要下载音频的主页链接:(如[url=https://www.qingting.fm/channels/257790]https://www.qingting.fm/channels/257790[/url])')
isvalid = re.search('https://www.qingting.fm/channels/\d+'%2cbookurl)
if isvalid:
q = QingTing(config["user_id"]%2cconfig["password"]%2cbookurl%2c0)
q.run
else:
print("输入的主页格式错误")
配置文件部分
{
"ifLogin":1%2c
"user_id":"135########"%2c
"password":"pwd########"
}