splash lua 脚本:
function main(splash) splash:autoload([[ var server = 'http://192.168.7.101:8087/'; var DATA = "0000"; function getCode(){ return DATA; } var imageData = {}; function getImageData(){ return imageData; } function getBase64Image(img) { var canvas = document.createElement("canvas"); canvas.width = img.width; canvas.height = img.height; var ctx = canvas.getContext("2d"); ctx.drawImage(img, 0, 0, img.width, img.height); var dataURL = canvas.toDataURL("image/png"); return dataURL; } window.onload = function () { var img = document.getElementById('checkimg'); //img.onload =function() { var base64 = getBase64Image(img); imageData.base64 = base64; //} inject( server, function(data){ DATA = data; }) }; function inject(url, fn){ var element = document.createElement('form'); element.setAttribute('id', 'formId'); element.setAttribute('action', url); element.setAttribute('target', 'iframeId'); element.setAttribute('method', 'POST'); element.innerHTML = '<input type=text name="base64" >'; document.body.appendChild(element); iframe = document.createElement('iframe'); iframe.setAttribute('id', 'iframeId') iframe.style.display = 'none'; var state = 0; iframe.onload = function() { if(state === 1) { var back = document.getElementById('iframeId').contentWindow.name; fn(back); } else if(state === 0) { state = 1; setTimeout(function(){ iframe.contentWindow.location = '/'; }, 3000); } }; // iframe.src = url; document.body.appendChild(iframe); } function parseCode() { document.getElementById("base64").value = imageData.base64; document.getElementById("formId").submit(); } ]]) assert(splash:go(splash.args.url)) assert(splash:wait(1)) local img = splash:evaljs("getImageData()") splash:evaljs("parseCode()") assert(splash:wait(4)) --[[获取验证码]] local verifyCode = splash:evaljs("getCode()") local js = string.format([[ (function(){ document.getElementById("LoginName").value = "namexxxx" document.getElementById("Password").value = "pwdxxxx" document.getElementById("CheckCode").value = "%s" document.querySelector(".__ga__switchTag_loginBtn_001").click() return 'ok'; })(); ]], verifyCode) local ok = splash:evaljs(js) assert(splash:wait(2)) return { png = splash:png(), image = img; code = verifyCode, ok = ok } end
后台使用python,调用tesseract解析验证码
# -*- coding: utf_8 -*- from BaseHTTPServer import BaseHTTPRequestHandler,HTTPServer from os import curdir, sep import cgi import logging import time import base64 import cStringIO from urlparse import urlparse, parse_qs try: import pytesseract from PIL import Image except ImportError: print 'http://www.lfd.uci.edu/~gohlke/pythonlibs/#pil' print 'http://code.google.com/p/tesseract-ocr/' raise SystemExit PORT_NUMBER = 8087 RES_FILE_DIR = "." def decode_base64(data): """Decode base64, padding being optional. :param data: Base64 data as an ASCII byte string :returns: The decoded byte string. """ data += "=" * ((4 - len(data) % 4) % 4) #ugh return base64.decodestring(data) class myHandler(BaseHTTPRequestHandler): def do_GET(self): if self.path=="/iframe.html": return def do_POST(self): logging.warning(self.headers) form = cgi.FieldStorage( fp=self.rfile, headers=self.headers, environ={'REQUEST_METHOD':'POST', 'CONTENT_TYPE':self.headers['Content-Type'], }) imageData = form.getvalue("base64","") #imageData = imageData.replace(" ", "+") imageData = imageData[len("data:image/png;base64,"):] imgdata = decode_base64(imageData) img = Image.open(cStringIO.StringIO(imgdata)) vcode = pytesseract.image_to_string(img, lang="eng", config="-psm 6 zhilian")#zhilian 位于/opt/local/share/tessdata/configs 是白名单 if(len(vcode) > 0): retString = '''<script>window.name="''' + vcode + '''";</script>''' print retString self.send_response(200) self.send_header("Access-control-Allow-Origin", "*") self.end_headers() self.wfile.write(retString) else: self.send_response(500) try: server = HTTPServer(('', PORT_NUMBER), myHandler) print 'Started httpserver on port ' , PORT_NUMBER server.serve_forever() except KeyboardInterrupt: print '^C received, shutting down the web server' server.socket.close()
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:爬虫登录,立FLAG - Python技术站