import urllib.request
import os
def url_open(url):
'''open url and return source html code'''
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read()
return html
def save_file(file_url):
''' open a url and save file'''
# get file name
filename = file_url.split('/')[-1]
# write file to local
with open(filename, 'wb') as f:
file = url_open(file_url)
f.write(file)
def download_file(folder="files"):
'''to download file form internet'''
# build a folder if it doesn't exit
if not os.path.exists(folder):
os.makedirs(folder)
os.chdir(folder)
# based url
url = "https://physionet.org/physiobank/database/mitdb/"
for i in range(100,235):
file_list = i
# url of ECG signal head file '*.hea'
file_url = url + str(file_list) + '.hea'
# save file
try:
save_file(file_url)
except:
continue
# discard the empty files
file_path = 'D:\\Python\\PyCharm_Projects\\learn_py\\file'
for root, dirs, files in os.walk(file_path):
for f in files:
empty_f = os.path.getsize(file_path + '\\' + f)
if empty_f <= 0:
os.remove(file_path+'\\'+f)
if __name__=='__main__':
download_file()
本站文章如无特殊说明,均为本站原创,如若转载,请注明出处:小爬虫-从PhysioNet上下载MIT-BIH Arrhythmia Database的ECG数据 - Python技术站