破解有道词典

1.进行普通爬取尝试:

 1 '''
 2 破解有道词典
 3 V1
 4 '''
 5 
 6 from urllib import request, parse
 7 
 8 
 9 def youdao(key):
10 
11     url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
12 
13     data = {
14         "i": "boy",
15         "from":"AUTO",
16         "to": "AUTO",
17         "smartresult": "dict",
18         "client": "fanyideskweb",
19         "salt": "1523100789519",
20         "sign": "b8a55a436686cd89873fa46514ccedbe",
21         "doctype": "json",
22         "version": "2.1",
23         "keyfrom": "fanyi.web",
24         "action":"FY_BY_REALTIME",
25         "typoResult": "false"
26     }
27 
28     # 参数data需要是bytes格式
29     data = parse.urlencode(data).encode()
30 
31     headers = {
32                   "Accept": "application/json,text/javascript,*/*;q=0.01",
33                   #"Accept-Encoding": "gzip,deflate",
34                   "Accept-Language": "zh-CN,zh;q=0.9",
35                   "Connection": "keep-alive",
36                   "Content-Length": "200",
37                   "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
38                   "Cookie": "OUTFOX_SEARCH_USER_ID=-1548144101@10.168.8.76;JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
39                   "Host": "fanyi.youdao.com",
40                   "Origin": "http://fanyi.youdao.com",
41                   "Referer": "http://fanyi.youdao.com/",
42                   "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
43     }
44 
45     req = request.Request(url=url, data=data, headers=headers)
46 
47     rsp = request.urlopen(req)
48 
49     html = rsp.read().decode()
50     print(html)
51 
52 if __name__ == '__main__':
53     youdao("boy")

2.破解有道词典的JS-,MD5-加密算法进行爬取数据(处理JS加密代码)

 1 '''
 2 V2
 3 处理js加密代码
 4 '''
 5 
 6 '''
 7 通过查找,能找到js代码中操作代码
 8 
 9 1. 这个是计算salt的公式 r = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
10 2. sign: n.md5("fanyideskweb" + t + r + "ebSeFb%=XZ%T[KZ)c(sy!");
11 md5一共需要四个参数,第一个和第四个都是固定值的字符串,第三个是所谓的salt,第二个是。。。。。
12 第二个参数就是输入的要查找的单词
13 
14 '''
15 
16 
17 def getSalt():
18     '''
19     salt公式是:  "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
20     把他翻译成python代码
21     :return:
22     '''
23     import time, random
24 
25     salt = int(time.time()*1000) + random.randint(0,10)
26 
27     return salt
28 
29 def getMD5(v):
30     import hashlib
31     md5 = hashlib.md5()
32 
33     # update需要一共bytes格式的参数
34     md5.update(v.encode("utf-8"))
35 
36     sign = md5.hexdigest()
37 
38     return sign
39 
40 
41 def getSign(key, salt):
42 
43     sign = 'fanyideskweb'+ key + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!"
44     sign = getMD5(sign)
45 
46     return sign
47 
48 from urllib import request, parse
49 
50 
51 def youdao(key):
52 
53     url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
54 
55     salt = getSalt()
56 
57     data = {
58         "i": key,
59         "from":"AUTO",
60         "to": "AUTO",
61         "smartresult": "dict",
62         "client": "fanyideskweb",
63         "salt": str(salt) ,
64         "sign": getSign(key, salt),
65         "doctype": "json",
66         "version": "2.1",
67         "keyfrom": "fanyi.web",
68         "action":"FY_BY_REALTIME",
69         "typoResult": "false"
70     }
71 
72     print(data)
73 
74     # 参数data需要是bytes格式
75     data = parse.urlencode(data).encode()
76 
77     headers = {
78         "Accept": "application/json,text/javascript,*/*;q=0.01",
79         #"Accept-Encoding": "gzip,deflate",
80         "Accept-Language": "zh-CN,zh;q=0.9",
81         "Connection": "keep-alive",
82         "Content-Length": len(data),
83         "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
84         "Cookie": "OUTFOX_SEARCH_USER_ID=-1548144101@10.168.8.76;JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
85         "Host": "fanyi.youdao.com",
86         "Origin": "http://fanyi.youdao.com",
87         "Referer": "http://fanyi.youdao.com/",
88         "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
89     }
90 
91     req = request.Request(url=url, data=data, headers=headers)
92 
93     rsp = request.urlopen(req)
94 
95     html = rsp.read().decode()
96     print(html)
97 
98 if __name__ == '__main__':
99     youdao("boy")

Python爬虫-破解有道词典(破解MD5的JS加密算法)

=========================

Python爬虫-破解有道词典(破解MD5的JS加密算法)

==================================

Python爬虫-破解有道词典(破解MD5的JS加密算法)

==================================

Python爬虫-破解有道词典(破解MD5的JS加密算法)

======================================

Python爬虫-破解有道词典(破解MD5的JS加密算法)

 ==========================================

结果示例:

Python爬虫-破解有道词典(破解MD5的JS加密算法)

JS代码格式化工具:

http://tool.oschina.net/codeformat/js