#coding:utf-8 import re,urllib def gethtml(url): page = urllib.urlopen(url) html=page.read() return html def getlink(html): link = re.findall(r'<td align="left" style="padding-left:20px;">(.*?)</td>',html) #linklist = re.findall(link,html) return link def save(links): f=open('360.txt','a') for i in links: f.write(i+"\n") #f.close() #print 'ok' for page in range(11, 200): url = "https://butian.360.cn/company/lists/page/" +str(page) html = gethtml(url) print str(page)+"ye" links = getlink(html) print links save(links)
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:爬虫之一:爬补天厂商数据(爬虫) - Python技术站