# -*- coding: utf-8 -*-
"""
Created on Sat Dec 18 00:00:59 2021
@author: Hider
"""
import requests
import parsel
import time
import pandas as pd

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36', 'Connection': 'close'
}

def get_page(page):
    url = 'https://www.kuaidaili.com/free/inha/' + str(page)
    response = requests.get(url=url, headers=headers)
    html = parsel.Selector(response.text)
    parse_page(html)

def parse_page(html):
    parse_list = html.xpath('//table[@class="table table-bordered table-striped"]/tbody/tr')
    for tr in parse_list:
        # parse_lists = {}
        ip = tr.xpath('./td[@data-title="IP"]//text()').extract_first()
        port = tr.xpath('./td[@data-title="PORT"]//text()').extract_first()
        nimingdu = tr.xpath('./td[@data-title="匿名度"]//text()').extract_first()
        type1 = tr.xpath('./td[@data-title="类型"]//text()').extract_first()
        location = tr.xpath('./td[@data-title="位置"]//text()').extract_first()
        speed = tr.xpath('./td[@data-title="响应速度"]//text()').extract_first()
        last_time = tr.xpath('./td[@data-title="最后验证时间"]//text()').extract_first()
        # parse_lists[http] = num + ':' + port
        parse_lists.append([ip, port, nimingdu, type1, location, speed, last_time])
        time.sleep(0.1)
        # print(parse_lists)
    
if __name__ == '__main__':
    parse_lists = []
    for page in range(1, 21):
        get_page(page)

df = pd.DataFrame(parse_lists, columns=['IP','PORT','匿名度','类型','位置','响应速度','最后验证时间'])