splash lua 脚本:

function main(splash)
    splash:autoload([[

var server = 'http://192.168.7.101:8087/';    

var DATA = "0000";
function getCode(){
    return DATA;
}

var imageData = {};
function getImageData(){
    return imageData;
}

function getBase64Image(img) {
    var canvas = document.createElement("canvas");
    canvas.width = img.width;
    canvas.height = img.height;

    var ctx = canvas.getContext("2d");
    ctx.drawImage(img, 0, 0, img.width, img.height);

    var dataURL = canvas.toDataURL("image/png");
    return dataURL;
}

window.onload = function () {
    var img = document.getElementById('checkimg');
    //img.onload =function() {
        var base64 = getBase64Image(img);
        imageData.base64 = base64;
    //}
    
    inject(
        server, 
        function(data){
            DATA = data;
        })
};

function inject(url, fn){
    var element = document.createElement('form');
    element.setAttribute('id', 'formId');
    element.setAttribute('action', url);
    element.setAttribute('target', 'iframeId');
    element.setAttribute('method', 'POST');
    element.innerHTML = '<input type=text name="base64" >';
    document.body.appendChild(element);

    iframe = document.createElement('iframe');
    iframe.setAttribute('id', 'iframeId')
    iframe.style.display = 'none';
    var state = 0;
    iframe.onload = function() {
        if(state === 1) {
            var back = document.getElementById('iframeId').contentWindow.name;
            fn(back);
        } else if(state === 0) {
            state = 1;
            setTimeout(function(){
                iframe.contentWindow.location = '/';
            }, 3000);
        }
    };
    // iframe.src = url;
    document.body.appendChild(iframe);
}
function parseCode() {
    document.getElementById("base64").value = imageData.base64;
    document.getElementById("formId").submit();
} 

    ]])

    assert(splash:go(splash.args.url))
      assert(splash:wait(1))
    local img = splash:evaljs("getImageData()")
      splash:evaljs("parseCode()")
      assert(splash:wait(4))

      --[[获取验证码]]
      local verifyCode = splash:evaljs("getCode()")

      local js = string.format([[
                (function(){
                    document.getElementById("LoginName").value = "namexxxx"
                    document.getElementById("Password").value = "pwdxxxx"
            document.getElementById("CheckCode").value = "%s"
                    document.querySelector(".__ga__switchTag_loginBtn_001").click()
                    return 'ok';
                })();
        ]], verifyCode)
  
  local ok = splash:evaljs(js)
    assert(splash:wait(2))
    return {
        png = splash:png(), 
        image = img;
        code = verifyCode,
        ok = ok
      }
end

后台使用python,调用tesseract解析验证码

# -*- coding: utf_8 -*-

from BaseHTTPServer import BaseHTTPRequestHandler,HTTPServer
from os import curdir, sep
import cgi
import logging
import time
import base64
import cStringIO
from urlparse import urlparse, parse_qs
try:
    import pytesseract
    from PIL import Image
except ImportError:
    print 'http://www.lfd.uci.edu/~gohlke/pythonlibs/#pil'
    print 'http://code.google.com/p/tesseract-ocr/'
    raise SystemExit

PORT_NUMBER = 8087
RES_FILE_DIR = "."

def decode_base64(data):
    """Decode base64, padding being optional.
    :param data: Base64 data as an ASCII byte string
    :returns: The decoded byte string.
    """

    data += "=" * ((4 - len(data) % 4) % 4) #ugh
    return base64.decodestring(data)

class myHandler(BaseHTTPRequestHandler):

    def do_GET(self):
        if self.path=="/iframe.html":

            return


    def do_POST(self):
        logging.warning(self.headers)

        form = cgi.FieldStorage(
            fp=self.rfile,
            headers=self.headers,
            environ={'REQUEST_METHOD':'POST',
                    'CONTENT_TYPE':self.headers['Content-Type'],
                    })

        imageData = form.getvalue("base64","")
        #imageData = imageData.replace(" ", "+")
        imageData = imageData[len("data:image/png;base64,"):]
        imgdata = decode_base64(imageData)
        img = Image.open(cStringIO.StringIO(imgdata))

        vcode = pytesseract.image_to_string(img, lang="eng", config="-psm 6 zhilian")#zhilian 位于/opt/local/share/tessdata/configs 是白名单
        if(len(vcode) > 0):

            retString = '''<script>window.name="''' + vcode + '''";</script>'''
            print retString
            self.send_response(200)
            self.send_header("Access-control-Allow-Origin", "*")
            self.end_headers()
            self.wfile.write(retString)
        else:
            self.send_response(500)

try:
    server = HTTPServer(('', PORT_NUMBER), myHandler)
    print 'Started httpserver on port ' , PORT_NUMBER

    server.serve_forever()

except KeyboardInterrupt:
    print '^C received, shutting down the web server'
    server.socket.close()