''' # 代码范本 任务添加函数、任务执行函数;进程、线程切换函数;进、线程开启函数; ''' from multiprocessing import Pool as processPoll from multiprocessing.dummy import Pool as ThreadPool def get_page(): # 任务执行 pass def url_list(): # 任务添加 pass def get_pool(): # 设定进、线程 pass def open_pool(): # 启动 pass if __name__ == '__main__': open_pool()
使用16线程爬取腾讯的招聘的100页分页信息,用时6秒左右(3M网速)
''' 任务添加函数、任务执行函数;进程、线程切换函数;进、线程开启函数; ''' import requests from urllib import request import ssl ssl._create_default_https_context = ssl._create_unverified_context from datetime import datetime from multiprocessing import Pool as ProcessPoll from multiprocessing.dummy import Pool as ThreadPool def get_page(task_q): # 任务执行 headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'} req = request.Request(task_q,headers=headers) response = request.urlopen(req) print(response.url) # response = requests.get(task_q,headers=headers,verify=False) # print(response.url,response.status_code) def url_list(): # 任务添加 task_q = [] base_url = 'http://hr.tencent.com/position.php?start={}' for i in range(0,10*100,10): full_url = base_url.format(i) task_q.append(full_url) return task_q def get_pool(way=True,count=4): # 设定进、线程 if way: pool = ProcessPoll(count) # 进程 else: pool = ThreadPool(count) # 线程 return pool def open_pool(): # 启动 start = datetime.now() pool = get_pool(way=False,count=16) task_q = url_list() pool.map(get_page,task_q) pool.close() pool.join() end = datetime.now() print('程序结束,用时',end-start) if __name__ == '__main__': open_pool()
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:python_爬虫_multiprocessing.dummy以及multiprocessing - Python技术站