6
6
7
7
import re
8
8
import rsa
9
- import ssl
10
9
import time
11
10
import json
12
11
import base64
13
12
import logging
14
13
import binascii
14
+ import requests
15
15
import urllib .parse
16
16
17
- # 参考PSpider项目
18
- import spider
19
- ssl ._create_default_https_context = ssl ._create_unverified_context
20
-
21
17
22
18
class WeiBoLogin (object ):
23
19
"""
@@ -28,51 +24,34 @@ def __init__(self):
28
24
"""
29
25
constructor
30
26
"""
31
- self .user_name = None # 登录用户名
32
- self .pass_word = None # 登录密码
33
- self .user_uniqueid = None # 用户唯一ID
34
- self .user_nick = None # 用户昵称
27
+ self .user_name = None
28
+ self .pass_word = None
29
+ self .user_uniqueid = None
30
+ self .user_nick = None
35
31
36
- self .cookie_jar , self .opener = None , None
32
+ self .session = requests .Session ()
33
+ self .session .headers .update ({"User-Agent" : "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0" })
34
+ self .session .get ("http://weibo.com/login.php" )
37
35
return
38
36
39
- def login (self , user_name , pass_word , proxies = None ):
37
+ def login (self , user_name , pass_word ):
40
38
"""
41
39
login weibo.com, return True or False
42
40
"""
43
- # 变量赋值初始化
44
41
self .user_name = user_name
45
42
self .pass_word = pass_word
46
43
self .user_uniqueid = None
47
44
self .user_nick = None
48
45
49
- # 构建cookie_jar和opener,这里不使用代理,同时保证整个流程中不需要关心cookie问题
50
- self .cookie_jar , self .opener = spider .make_cookiejar_opener (is_cookie = True , proxies = proxies )
51
- self .opener .addheaders = spider .make_headers (
52
- user_agent = "pc" ,
53
- host = "weibo.com" ,
54
- referer = "http://weibo.com/" ,
55
- accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" ,
56
- accept_encoding = "gzip, deflate" ,
57
- accept_language = "zh-CN,zh;q=0.8"
58
- ).items ()
59
-
60
- # (1) 打开weibo.com/login.php,先请求一些必要的cookie信息
61
- self .opener .open ("http://weibo.com/login.php" )
62
-
63
- # (2) 根据用户名获取加密后的用户名
46
+ # get json data
64
47
s_user_name = self .get_username ()
65
-
66
- # (3) 利用加密后的用户名,获取其他一些数据:json格式
67
48
json_data = self .get_json_data (su_value = s_user_name )
68
49
if not json_data :
69
50
return False
70
-
71
- # (4) 根据第三步得到的json数据,获取加密后的密码
72
51
s_pass_word = self .get_password (json_data ["servertime" ], json_data ["nonce" ], json_data ["pubkey" ])
73
52
74
- # (5) 构造登录中用到的postdata
75
- post_dict = {
53
+ # make post_data
54
+ post_data = {
76
55
"entry" : "weibo" ,
77
56
"gateway" : "1" ,
78
57
"from" : "" ,
@@ -93,35 +72,28 @@ def login(self, user_name, pass_word, proxies=None):
93
72
"returntype" : "TEXT" ,
94
73
}
95
74
96
- # (6) 判断是否需要输入验证码,如果需要,获取验证码并进行打码操作
97
- if json_data . get ( "showpin" , None ) == 1 :
75
+ # get captcha code
76
+ if json_data [ "showpin" ] == 1 :
98
77
url = "http://login.sina.com.cn/cgi/pin.php?r=%d&s=0&p=%s" % (int (time .time ()), json_data ["pcid" ])
99
78
with open ("captcha.jpeg" , "wb" ) as file_out :
100
- file_out .write (self .opener . open (url ).read () )
79
+ file_out .write (self .session . get (url ).content )
101
80
code = input ("请输入验证码:" )
102
- # cid, code = self.yundama.get_captcha(self.opener.open(url).read(), "captcha.jpeg", "image/jpeg", codetype="1005")
103
- # if not code:
104
- # return False
105
- post_dict ["pcid" ] = json_data ["pcid" ]
106
- post_dict ["door" ] = code
81
+ post_data ["pcid" ] = json_data ["pcid" ]
82
+ post_data ["door" ] = code
107
83
108
- # (7) 根据构造的postdata,登录微博
84
+ # login weibo.com
109
85
login_url_1 = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=%d" % int (time .time ())
110
- json_data_1 = json . loads ( spider . get_html_content ( self .opener . open (login_url_1 , data = spider . make_post_data ( post_dict ))) )
86
+ json_data_1 = self .session . post (login_url_1 , data = post_data ). json ( )
111
87
if json_data_1 ["retcode" ] == "0" :
112
- # 登录后有一个跳转, 构造跳转链接的postdata
113
- post_dict = {
88
+ params = {
114
89
"callback" : "sinaSSOController.callbackLoginStatus" ,
90
+ "client" : "ssologin.js(v1.4.18)" ,
115
91
"ticket" : json_data_1 ["ticket" ],
116
92
"ssosavestate" : int (time .time ()),
117
- "client" : "ssologin.js(v1.4.18)" ,
118
93
"_" : int (time .time ()* 1000 ),
119
94
}
120
- login_url_2 = "https://passport.weibo.com/wbsso/login?" + urllib .parse .urlencode (post_dict )
121
- html_data = spider .get_html_content (self .opener .open (login_url_2 ), charset = "gbk" )
122
- json_data_2 = json .loads (re .search ("\((?P<result>.*)\)" , html_data ).group ("result" ))
123
-
124
- # 检查登录是否成功,并获取用户唯一ID,用户昵称等
95
+ response = self .session .get ("https://passport.weibo.com/wbsso/login" , params = params )
96
+ json_data_2 = json .loads (re .search (r"\((?P<result>.*)\)" , response .text ).group ("result" ))
125
97
if json_data_2 ["result" ] is True :
126
98
self .user_uniqueid = json_data_2 ["userinfo" ]["uniqueid" ]
127
99
self .user_nick = json_data_2 ["userinfo" ]["displayname" ]
@@ -134,7 +106,7 @@ def login(self, user_name, pass_word, proxies=None):
134
106
135
107
def get_username (self ):
136
108
"""
137
- get username, encrypt file: http://tjs.sjs.sinajs.cn/t5/register/js/page/remote/loginLayer.js
109
+ get legal username
138
110
"""
139
111
username_quote = urllib .parse .quote_plus (self .user_name )
140
112
username_base64 = base64 .b64encode (username_quote .encode ("utf-8" ))
@@ -144,20 +116,18 @@ def get_json_data(self, su_value):
144
116
"""
145
117
get the value of "servertime", "nonce", "pubkey", "rsakv" and "showpin", etc
146
118
"""
147
- post_data = urllib . parse . urlencode ( {
119
+ params = {
148
120
"entry" : "weibo" ,
149
121
"callback" : "sinaSSOController.preloginCallBack" ,
150
122
"rsakt" : "mod" ,
151
123
"checkpin" : "1" ,
152
124
"client" : "ssologin.js(v1.4.18)" ,
153
125
"su" : su_value ,
154
126
"_" : int (time .time ()* 1000 ),
155
- })
156
-
127
+ }
157
128
try :
158
- response = self .opener .open ('http://login.sina.com.cn/sso/prelogin.php?' + post_data )
159
- data = spider .get_html_content (response , charset = "utf-8" )
160
- json_data = json .loads (re .search ("\((?P<data>.*)\)" , data ).group ("data" ))
129
+ response = self .session .get ("http://login.sina.com.cn/sso/prelogin.php" , params = params )
130
+ json_data = json .loads (re .search (r"\((?P<data>.*)\)" , response .text ).group ("data" ))
161
131
except Exception as excep :
162
132
json_data = {}
163
133
logging .error ("WeiBoLogin get_json_data error: %s" , excep )
@@ -167,17 +137,16 @@ def get_json_data(self, su_value):
167
137
168
138
def get_password (self , servertime , nonce , pubkey ):
169
139
"""
170
- get legal password, encrypt file: http://tjs.sjs.sinajs.cn/t5/register/js/page/remote/loginLayer.js
140
+ get legal password
171
141
"""
172
- string = (str (servertime ) + ' \t ' + str (nonce ) + ' \n ' + str (self .pass_word )).encode ("utf-8" )
142
+ string = (str (servertime ) + " \t " + str (nonce ) + " \n " + str (self .pass_word )).encode ("utf-8" )
173
143
public_key = rsa .PublicKey (int (pubkey , 16 ), int ("10001" , 16 ))
174
144
password = rsa .encrypt (string , public_key )
175
145
password = binascii .b2a_hex (password )
176
146
return password .decode ()
177
147
178
148
179
- if __name__ == ' __main__' :
149
+ if __name__ == " __main__" :
180
150
logging .basicConfig (level = logging .DEBUG , format = "%(asctime)s\t %(levelname)s\t %(message)s" )
181
- # 测试登录,输入微博的用户名和密码
182
151
weibo = WeiBoLogin ()
183
152
weibo .login ("username" , "password" )
0 commit comments