-
什么是Selenium
selenium 是一套完整的web应用程序测试系统,包含了测试的录制(selenium IDE),编写及运行(Selenium Remote Control)和测试的并行处理(Selenium Grid)。
selenium的核心Selenium Core基于JsUnit,完全由JavaScript编写,因此可以用于任何支持JavaScript的浏览器上。
selenium可以模拟真实浏览器,自动化测试工具,支持多种浏览器,爬虫中主要用来解决JavaScript渲染问题。
-
基本使用
用python写爬虫的时候,主要用的是selenium的Webdriver,我们可以通过下面的方式先看看Selenium.Webdriver支持哪些浏览器
-
基本用法:
#打开google浏览器,再打开百度,输入Python然后按回车 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait chromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver" browser = webdriver.Chrome(chromedriver) try: browser.get('https://www.baidu.com') input = browser.find_element_by_id('kw') # 找到id为kw的元素 input.send_keys('Python') # 敲入Python input.send_keys(Keys.ENTER) # 敲入回车 wait = WebDriverWait(browser, 10) wait.until(EC.presence_of_element_located((By.ID, 'content_left'))) # 等待某个元素加载出来 print(browser.current_url) print(browser.get_cookies()) print(browser.page_source) # 网页源代码 finally: browser.close()
https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=Python&rsv_pq=aa95af1b0000f45e&rsv_t=53e2qpjWA%2FivTq6GgdT4FAhWM%2FWfkPZhHYkLoaS7MVPQUZCHF%2FkLQV2%2Brnc&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug2=0&inputT=111&rsv_sug4=112 [{'domain': '.baidu.com', 'httpOnly': False, 'name': 'H_PS_PSSID', 'path': '/', 'secure': False, 'value': '1464_21121_26922_22159'}, {'domain': '.baidu.com', 'expiry': 3681286522.89887, 'httpOnly': False, 'name': 'BAIDUID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4:FG=1'}, {'domain': '.baidu.com', 'expiry': 3681286522.899111, 'httpOnly': False, 'name': 'BIDUPSID', 'path': '/', 'secure': False, 'value': '410FC9CBBC1B798C0BEF149D5C0BD4E4'}, {'domain': '.baidu.com', 'expiry': 3681286522.899241, 'httpOnly': False, 'name': 'PSTM', 'path': '/', 'secure': False, 'value': '1533802877'}, {'domain': '.baidu.com', 'httpOnly': False, 'name': 'PSINO', 'path': '/', 'secure': False, 'value': '7'}, {'domain': 'www.baidu.com', 'expiry': 2479882880.172246, 'httpOnly': False, 'name': 'delPer', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_HOME', 'path': '/', 'secure': False, 'value': '0'}, {'domain': 'www.baidu.com', 'expiry': 1534666877, 'httpOnly': False, 'name': 'BD_UPN', 'path': '/', 'secure': False, 'value': '12314753'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'name': 'BD_CK_SAM', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.baidu.com', 'expiry': 1533805472, 'httpOnly': False, 'name': 'H_PS_645EC', 'path': '/', 'secure': False, 'value': 'c9083HSTzGdEsBVBx%2FDjhaEep8Lu5MHd8KusVOaaun2nj5W%2Bjur8%2BSHut%2BM'}] <!DOCTYPE html><!--STATUS OK--><html xmlns="http://www.w3.org/1999/xhtml"><head><script charset="utf-8" async="" src="https://ss0.bdstatic.com/-0U0bnSm1A5BphGlnYG/tam-ogel/5d4e9b24-dcc5-483a-b6da-be1e9e621891.js"></script>
获得的结果
声明浏览器对象(上面我们知道了selenium支持很多的浏览器,但是如果想要声明并调用浏览器则需要:)
from selenium import webdriverbrowser = webdriver.Chrome()browser = webdriver.Firefox()browser = webdriver.Edge()browser = webdriver.PhantomJS()browser = webdriver.Safari()
访问页面
from selenium import webdriverchromedriver = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver"browser = webdriver.Chrome(chromedriver)browser.get('https://www.taobao.com')print(browser.page_source)browser.close()
<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="zh-CN" class="ks-webkit537 ks-webkit ks-chrome68 ks-chrome"><head><script>/*! 2018-08-07 14:58:00 v8.5.7 */ !function(e){function i(n){if(o[n])return o[n].exports;var r=o[n]={exports:{},id:n,loaded:!1};return e[n].call(r.exports,r,r.exports,i),r.loaded=!0,r.exports}var o={};return i.m=e,i.c=o,i.p="",i(0)}([function(e,i){"use strict";var o=window,n=document;!function(){var e=2,r="ali_analytics";if(o[r]&&o[r].ua&&e<=o[r].ua.version)return void(i.info=o[r].ua);var t,a,d,s,c,u,h,l,m,b,f,v,p,w,g,x,z,O=o.navigator,k=O.appVersion,T=O&&O.userAgent||"",y=function(e){var i=0;return parseFloat(e.replace(/\./g,function(){return 0===i++?".":""}))},_=function(e,i){var o,n;i[o="trident"]=.1,(n=e.match(/Trident\/([\d.]*)/))&&n[1]&&(i[o]=y(n[1])),i.core=o},N=function(e){var i,o;return(i=e.match(/MSIE ([^;]*)|Trident.*; rv(?:\s|:)?([0-9.]+)/))&&(o=i[1]||i[2])?y(o):0},P=function(e){return e||"other"},M=function(e){function i(){for(var i=[["Windows NT 5.1","winXP"],["Windows NT 6.1","win7"],["Windows NT 6.0","winVista"],["Windows NT 6.2","win8"],["Windows NT 10.0","win10"],["iPad","ios"],["iPhone;","ios"],["iPod","ios"],["Macintosh","mac"],["Android","android"],["Ubuntu","ubuntu"],["Linux","linux"],["Windows NT 5.2","win2003"],["Windows NT 5.0","win2000"],["Windows","winOther"],["rhino","rhino"]],o=0,n=i.length;o<n;++o)if(e.indexOf(i[o][0])!==-1)return i[o][1];return"other"}function r(e,i,n,r){var t,a=o.navigator.mimeTypes;try{for(t in a)if(a.hasOwnProperty(t)&&a[t][e]==i){if(void 0!==n&&r.test(a[t][n]))return!0;if(void 0===n)return!0}return!1}catch(e){return!1}}var t,a,d,s,c,u,h,l="",m=l,b=l,f=[6,9],v="{{version}}",p="<!--[if IE "+v+"]><s></s><![endif]-->",w=n&&n.createElement("div"),g=[],x={webkit:void 0,edge:void 0,trident:void 0,gecko:void 0,presto:void 0,chrome:void 0,safari:void 0,firefox:void 0,ie:void 0,ieMode:void 0,opera:void 0,mobile:void 0,core:void 0,shell:void 0,phantomjs:void 0,os:void 0,ipad:void 0,iphone:void 0,ipod:void 0,ios:void 0,android:void 0,nodejs:void 0,extraName:void 0,extraVersion:void 0};if(w&&w.getElementsByTagName&&(w.innerHTML=p.replace(v,""),g=w.getElementsByTagName("s")),g.length>0){for(_(e,x),s=f[0],c=f[1];s<=c;s++)if(w.innerHTML=p.replace(v,s),g.length>0){x[b="ie"]=s;break}!x.ie&&(d=N(e))&&(x[b="ie"]=d)}else((a=e.match(/AppleWebKit\/*\s*([\d.]*)/i))||(a=e.match(/Safari\/([\d.]*)/)))&&a[1]?(x[m="webkit"]=y(a[1]),(a=e.match(/OPR\/(\d+\.\d+)/))&&a[1]?x[b="opera"]=y(a[1]):(a=e.match(/Chrome\/([\d.]*)/))&&a[1]?x[b="chrome"]=y(a[1]):(a=e.match(/\/([\d.]*) Safari/))&&a[1]?x[b="safari"]=y(a[1]):x.safari=x.webkit,(a=e.match(/Edge\/([\d.]*)/))&&a[1]&&(m=b="edge",x[m]=y(a[1])),/ Mobile\//.test(e)&&e.match(/iPad|iPod|iPhone/)?(x.mobile="apple",a=e.match(/OS ([^\s]*)/),a&&a[1]&&(x.ios=y(a[1].replace("_","."))),t="ios"。。。。。。。。。。。
获得的结果
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:PYTHON 爬虫笔记七:Selenium库基础用法 - Python技术站