• 什么是Selenium

    selenium 是一套完整的web应用程序测试系统,包含了测试的录制(selenium IDE),编写及运行(Selenium Remote Control)和测试的并行处理(Selenium Grid)。

      selenium的核心Selenium Core基于JsUnit,完全由JavaScript编写,因此可以用于任何支持JavaScript的浏览器上。

      selenium可以模拟真实浏览器,自动化测试工具,支持多种浏览器,爬虫中主要用来解决JavaScript渲染问题。

  • 基本使用

 用python写爬虫的时候,主要用的是selenium的Webdriver,我们可以通过下面的方式先看看Selenium.Webdriver支持哪些浏览器

  1. 基本用法:

    #打开google浏览器,再打开百度,输入Python然后按回车
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait
    
    chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"
    browser = webdriver.Chrome(chromedriver)
    try:
        browser.get('https://www.baidu.com')
        input = browser.find_element_by_id('kw')  # 找到id为kw的元素
        input.send_keys('Python')  # 敲入Python
        input.send_keys(Keys.ENTER)  # 敲入回车
        wait = WebDriverWait(browser, 10)
        wait.until(EC.presence_of_element_located((By.ID, 'content_left')))  # 等待某个元素加载出来
        print(browser.current_url)
        print(browser.get_cookies())
        print(browser.page_source)  # 网页源代码
    finally:
        browser.close()

    https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=aa95af1b0000f45e&rsv_t=53e2qpjWA%2FivTq6GgdT4FAhWM%2FWfkPZhHYkLoaS7MVPQUZCHF%2FkLQV2%2Brnc&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=111&rsv_sug4=112
    [{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '1464_21121_26922_22159'}, {'domain': '.baidu.com', 'expiry': 3681286522.89887, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4:FG=1'}, {'domain': '.baidu.com', 'expiry': 3681286522.899111, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4'}, {'domain': '.baidu.com', 'expiry': 3681286522.899241, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1533802877'}, {'domain': '.baidu.com', 'httpOnly': False, 'name': 'PSINO', 'path': '/', 'secure': False, 'value': '7'}, {'domain': 'www.baidu.com', 'expiry': 2479882880.172246, 'httpOnly': False, 'name': 'delPer', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'expiry': 1534666877, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_CK_SAM', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.baidu.com', 'expiry': 1533805472, 'httpOnly': False, 'name': 'H_PS_645EC', 'path': '/', 'secure': False, 'value': 'c9083HSTzGdEsBVBx%2FDjhaEep8Lu5MHd8KusVOaaun2nj5W%2Bjur8%2BSHut%2BM'}]
    <!DOCTYPE html><!--STATUS OK--><html xmlns="http://www.w3.org/1999/xhtml"><head><script charset="utf-8" async="" src="https://ss0.bdstatic.com/-0U0bnSm1A5BphGlnYG/tam-ogel/5d4e9b24-dcc5-483a-b6da-be1e9e621891.js"></script>

    获得的结果

  • 声明浏览器对象(上面我们知道了selenium支持很多的浏览器,但是如果想要声明并调用浏览器则需要:)

    from selenium import webdriverbrowser = webdriver.Chrome()browser = webdriver.Firefox()browser = webdriver.Edge()browser = webdriver.PhantomJS()browser = webdriver.Safari()
  • 访问页面

    from selenium import webdriverchromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"browser = webdriver.Chrome(chromedriver)browser.get('https://www.taobao.com')print(browser.page_source)browser.close()

    <!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="zh-CN" class="ks-webkit537 ks-webkit ks-chrome68 ks-chrome"><head><script>/*! 2018-08-07 14:58:00 v8.5.7 */
    !function(e){function i(n){if(o[n])return o[n].exports;var r=o[n]={exports:{},id:n,loaded:!1};return e[n].call(r.exports,r,r.exports,i),r.loaded=!0,r.exports}var o={};return i.m=e,i.c=o,i.p="",i(0)}([function(e,i){"use strict";var o=window,n=document;!function(){var e=2,r="ali_analytics";if(o[r]&amp;&amp;o[r].ua&amp;&amp;e&lt;=o[r].ua.version)return void(i.info=o[r].ua);var t,a,d,s,c,u,h,l,m,b,f,v,p,w,g,x,z,O=o.navigator,k=O.appVersion,T=O&amp;&amp;O.userAgent||"",y=function(e){var i=0;return parseFloat(e.replace(/\./g,function(){return 0===i++?".":""}))},_=function(e,i){var o,n;i[o="trident"]=.1,(n=e.match(/Trident\/([\d.]*)/))&amp;&amp;n[1]&amp;&amp;(i[o]=y(n[1])),i.core=o},N=function(e){var i,o;return(i=e.match(/MSIE ([^;]*)|Trident.*; rv(?:\s|:)?([0-9.]+)/))&amp;&amp;(o=i[1]||i[2])?y(o):0},P=function(e){return e||"other"},M=function(e){function i(){for(var i=[["Windows NT 5.1","winXP"],["Windows NT 6.1","win7"],["Windows NT 6.0","winVista"],["Windows NT 6.2","win8"],["Windows NT 10.0","win10"],["iPad","ios"],["iPhone;","ios"],["iPod","ios"],["Macintosh","mac"],["Android","android"],["Ubuntu","ubuntu"],["Linux","linux"],["Windows NT 5.2","win2003"],["Windows NT 5.0","win2000"],["Windows","winOther"],["rhino","rhino"]],o=0,n=i.length;o&lt;n;++o)if(e.indexOf(i[o][0])!==-1)return i[o][1];return"other"}function r(e,i,n,r){var t,a=o.navigator.mimeTypes;try{for(t in a)if(a.hasOwnProperty(t)&amp;&amp;a[t][e]==i){if(void 0!==n&amp;&amp;r.test(a[t][n]))return!0;if(void 0===n)return!0}return!1}catch(e){return!1}}var t,a,d,s,c,u,h,l="",m=l,b=l,f=[6,9],v="{{version}}",p="&lt;!--[if IE "+v+"]&gt;&lt;s&gt;&lt;/s&gt;&lt;![endif]--&gt;",w=n&amp;&amp;n.createElement("div"),g=[],x={webkit:void 0,edge:void 0,trident:void 0,gecko:void 0,presto:void 0,chrome:void 0,safari:void 0,firefox:void 0,ie:void 0,ieMode:void 0,opera:void 0,mobile:void 0,core:void 0,shell:void 0,phantomjs:void 0,os:void 0,ipad:void 0,iphone:void 0,ipod:void 0,ios:void 0,android:void 0,nodejs:void 0,extraName:void 0,extraVersion:void 0};if(w&amp;&amp;w.getElementsByTagName&amp;&amp;(w.innerHTML=p.replace(v,""),g=w.getElementsByTagName("s")),g.length&gt;0){for(_(e,x),s=f[0],c=f[1];s&lt;=c;s++)if(w.innerHTML=p.replace(v,s),g.length&gt;0){x[b="ie"]=s;break}!x.ie&amp;&amp;(d=N(e))&amp;&amp;(x[b="ie"]=d)}else((a=e.match(/AppleWebKit\/*\s*([\d.]*)/i))||(a=e.match(/Safari\/([\d.]*)/)))&amp;&amp;a[1]?(x[m="webkit"]=y(a[1]),(a=e.match(/OPR\/(\d+\.\d+)/))&amp;&amp;a[1]?x[b="opera"]=y(a[1]):(a=e.match(/Chrome\/([\d.]*)/))&amp;&amp;a[1]?x[b="chrome"]=y(a[1]):(a=e.match(/\/([\d.]*) Safari/))&amp;&amp;a[1]?x[b="safari"]=y(a[1]):x.safari=x.webkit,(a=e.match(/Edge\/([\d.]*)/))&amp;&amp;a[1]&amp;&amp;(m=b="edge",x[m]=y(a[1])),/ Mobile\//.test(e)&amp;&amp;e.match(/iPad|iPod|iPhone/)?(x.mobile="apple",a=e.match(/OS ([^\s]*)/),a&amp;&amp;a[1]&amp;&amp;(x.ios=y(a[1].replace("_","."))),t="ios"。。。。。。。。。。。

    获得的结果