数据采集之超级鹰验证码识别及模拟登录
使用超级鹰完成验证码识别及模拟登录
本博文爬取实例古诗文网,若网址发生变化,只需要修改对应代码网址即可
from lxml import etree
import requests
from chaojiying import Chaojiying_Client
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
}
session=requests.session()
# 1.查找验证码所在的位置,为下载图片做准备
url="https://so.gushiwen.cn/user/login.aspx"
resp=session.get(url,headers=headers)
html=etree.HTML(resp.text)
viewstate=html.xpath("//input[@id='__VIEWSTATE']/@value")[0]
viewstategenerator=html.xpath("//input[@id='__VIEWSTATEGENERATOR']/@value")[0]
baseurl="https://so.gushiwen.cn"
codeurl=baseurl+html.xpath("//img[@id='imgCode']/@src")[0]
# 2.下载图片
resp1=session.get(codeurl,headers=headers)
with open("code.png","wb") as fp:fp.write(resp1.content)
# 3.超级鹰识别
chao=Chaojiying_Client(username="sdutlss",password="sdutlss",soft_id=933175)
# 4.打开图片,识别
img=open("code.png","rb").read()
chaodict=chao.PostPic(img,1902)
textcode=chaodict["pic_str"]
print(textcode)