1、常见的打码平台
云打码:http://www.yundama.com/ (能够解决通用的验证码识别)
极验验证码智能识别辅助:http://jiyandoc.c2567.com/ (能够解决复杂验证码的识别)
2、云打码的使用
2.1 云打码官方接口
下面代码是云打码平台提供,做了个简单修改,实现了两个方法:
- indetify:传入图片的响应二进制数即可
- indetify_by_filepath:传入图片的路径即可识别
其中需要自己配置的地方是:
username = 'whoarewe' # 用户名 password = '*' # 密码 appid = 4283 # appid appkey = '02074c64f0d0bb9efb2dfb01c3' # appkey codetype = 1004 # 验证码类型
云打码官方提供的api如下:
# THE WINTER IS COMING! the old driver will be driving who was a man of the world! # -*- coding: utf-8 -*- python 3.6.7, create time is 18-12-13 上午9:33 GMT+8 # coding:utf-8 import requests import json import time class YDMHttp: apiurl = 'http://api.yundama.com/api.php' username = '' password = '' appid = '' appkey = '' def __init__(self, username, password, appid, appkey): self.username = username self.password = password self.appid = str(appid) self.appkey = appkey def request(self, fields, files=[]): print(files) response = self.post_url(self.apiurl, fields, files) response = json.loads(response) return response def balance(self): data = {
'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} response = self.request(data) if (response): if (response['ret'] and response['ret'] < 0): return response['ret'] else: return response['balance'] else: return -9001 def login(self): data = {
'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey} response = self.request(data) if (response): if (response['ret'] and response['ret'] < 0): return response['ret'] else: return response['uid'] else: return -9001 def upload(self, filename, codetype, timeout): data = {
'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)} file = {
'file': filename} print(file) response = self.request(data, file) print(response) if (response): if (response['ret'] and response['ret'] < 0): return response['ret'] else: return response['cid'] else: return -9001 def result(self, cid): data = {
'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid, 'appkey': self.appkey, 'cid': str(cid)} response = self.request(data) return response and response['text'] or '' def decode(self, filename, codetype, timeout): cid = self.upload(filename, codetype, timeout) if (cid > 0): for i in range(0, timeout): result = self.result(cid) if (result != ''): return cid, result else: time.sleep(1) return -3003, '' else: return cid, '' def post_url(self, url, fields, files=[]): for key in files: files[key] = open(files[key], 'rb') res = requests.post(url, files=files, data=fields) # print(res.request.__dict__) return res.text appid = 4283 # appid appkey = '02074c64f0d0bb9efb2dfb01c3' # appkey filename = 'b.jpg' # 文件位置 codetype = 1004 # 验证码类型 # 超时 timeout = 60 def indetify(response_content): if (username == 'username'): print('请设置好相关参数再测试') else: # 初始化 yundama = YDMHttp(username, password, appid, appkey) # 登陆云打码 uid = yundama.login() print('uid: %s' % uid) # 查询余额 balance = yundama.balance() print('balance: %s' % balance) # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果 cid, result = yundama.decode(response_content, codetype, timeout) print('cid: %s, result: %s' % (cid, result)) return result def indetify_by_filepath(file_path): if (username == 'username'): print('请设置好相关参数再测试') else: # 初始化 yundama = YDMHttp(username, password, appid, appkey) # 登陆云打码 uid = yundama.login() print('uid: %s' % uid) # 查询余额 balance = yundama.balance() print('balance: %s' % balance) # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果 cid, result = yundama.decode(file_path, codetype, timeout) print('cid: %s, result: %s' % (cid, result)) return result if __name__ == '__main__': indetify_by_filepath('b.jpg')
2.2 代码中调用云打码的接口
下面以豆瓣登录过程中的验证码为例,了解云打码如何使用
# coding=utf-8 from selenium import webdriver import time import requests from yundama import indetify driver = webdriver.Chrome() driver.get("https://www.douban.com/") #输入用户名 driver.find_element_by_id("form_email").send_keys("") #输入密码 driver.find_element_by_id("form_password").send_keys("") #获取验证码的地址 img_url = driver.find_element_by_id("captcha_image").get_attribute("src") response = requests.get(img_url) #请求验证码的地址 ret = indetify(response.content) #验证码识别 #输入验证码 driver.find_element_by_id("captcha_field").send_keys(ret) time.sleep(5) #登录 driver.find_element_by_class_name("bn-submit").click() time.sleep(10) print(driver.get_cookies()) driver.quit()
2.3 若快打码识别
import hashlib import requests from datetime import datetime RUOUSER = '' RUOPASS = '' # 若快 12306打码 直接传入本地文件路径 def getCode(img): url = "http://api.ruokuai.com/create.json" fileBytes = open(img, "rb").read() paramDict = {
'username': RUOUSER, 'password': RUOPASS, 'typeid': 6113, # 专门用来识别12306图片验证的类型id 'timeout': 90, 'softid': , # 推广用的 'softkey': '70acaa1e477a4374aa24b974b' # 推广用的 } paramKeys = ['username', 'password', 'typeid', 'timeout', 'softid', 'softkey' ] result = http_upload_image(url, paramKeys, paramDict, fileBytes) return result['Result'] # 若快12306打码 上传图片 def http_upload_image(url, paramKeys, paramDict, filebytes): timestr = datetime.now().strftime('%Y-%m-%d %H:%M:%S') boundary = '------------' + hashlib.md5(timestr.encode("utf8")).hexdigest().lower() boundarystr = '\r\n--%s\r\n' % (boundary) bs = b'' for key in paramKeys: bs = bs + boundarystr.encode('ascii') param = "Content-Disposition: form-data; name=\"%s\"\r\n\r\n%s" % (key, paramDict[key]) # print param bs = bs + param.encode('utf8') bs = bs + boundarystr.encode('ascii') header = 'Content-Disposition: form-data; name=\"image\"; filename=\"%s\"\r\nContent-Type: image/gif\r\n\r\n' % ('sample') bs = bs + header.encode('utf8') bs = bs + filebytes tailer = '\r\n--%s--\r\n' % (boundary) bs = bs + tailer.encode('ascii') headers = {
'Content-Type': 'multipart/form-data; boundary=%s' % boundary, 'Connection': 'Keep-Alive', 'Expect': '100-continue', } response = requests.post(url, params='', data=bs, headers=headers) return response.json() if __name__ == '__main__': # 测试 ret = getCode('../captcha_imgs/45_3.png') print(ret)
3、常见的验证码的种类
3.1 url地址不变,验证码不变
这是验证码里面非常简单的一种类型,对应的只需要获取验证码的地址,然后请求,通过打码平台识别即可。
3.2 url地址不变,验证码变化
这种验证码的类型是更加常见的一种类型,对于这种验证码,大家需要思考:
今天的文章 十七、打码平台的使用分享到此就结束了,感谢您的阅读。很明显,就是通过cookie来实现的,所以对应的,在请求页面,请求验证码,提交验证码的到时候需要保证cookie的一致性,对此可以使用requests.session来解决
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
如需转载请保留出处:https://bianchenghao.cn/bian-cheng-ji-chu/83735.html