写了一个很粗糙的某写真网站的小爬虫,有空改改

from selenium import webdriver
import re
import requests
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
from selenium.webdriver.firefox.options import Options

url = 'http://www.tujidao.com/a/?id=25309'

PhantomJS_conf = ['--load-images=false','--disk-cache=false']  # 浏览器不加载图片,不开启缓存

options = webdriver.FirefoxOptions()
options.add_argument('-headless')
browser = webdriver.Firefox(firefox_options=options)

# browser = webdriver.PhantomJS(service_args=PhantomJS_conf)
# browser.set_window_size(1400,900)                               # 设置浏览器窗口大小
wait = WebDriverWait(browser,10)

def login():
    browser.get(url)
    # 输入账号
    int_user = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'div.layui-form-item:nth-child(1) > div:nth-child(2) > input:nth-child(1)')))
    # 输入密码
    int_pass = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'div.layui-form-item:nth-child(2) > div:nth-child(2) > input:nth-child(1)')))

    # 登陆按钮
    log = wait.until(EC.presence_of_element_located(
        (By.CSS_SELECTOR, '.layui-btn')))
    int_user.send_keys(int())
    int_pass.send_keys(int())
    log.click()
    browser.get(url)
    return browser.page_source

def get_image():
    # wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.footer')))
    html2 = login()
    doc = pq(html2)
    item = doc('#kbox img')        # 找到ID
    # print(item)
    return item

def register():
    html3 = get_image()
    c1 = re.compile('<img.*?data-src="(.*?)"/>',re.S)
    c2 = re.findall(c1,str(html3))
    return c2

count = 0
for i in register():
    response = requests.get(i)
    dir = r'C:\Users\admin\Desktop\test\a'
    # print(response.content)
    with open(dir+'{}'.format(count)+'.jpg',mode='wb') as f:
        count += 1
        f.write(response.content)