import requests import os import re from lxml import etree from urllib import request def get_detail(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36" } rep = requests.get(url, headers=headers) html = etree.HTML(rep.text) imgs = html.xpath('//div[@class="page-content text-center"]//img[@class!="gif"]') for img in imgs: img_url = img.get("data-original") # 获取图片名称 img_name = img.get("alt") # 过滤特殊字符 img_name = re.sub(r'[\??\.,。!!]', "", img_name) # 获取图片后缀名 suffix = os.path.splitext(img_url)[1].split("!")[0] filename = img_name + suffix # 开始下载到本地 request.urlretrieve(img_url, "imgs/" + filename) def main(): for i in range(1, 101): url = "http://www.doutula.com/photo/list/?page={}".format(i) get_detail(url) if __name__ == '__main__': main()
View Code
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:Python爬虫之queue线程安全实战 - Python技术站