Skip to content

Commit 0e064fc

Browse files
committed
中知网注册
1 parent 5ca0af1 commit 0e064fc

File tree

8 files changed

+191
-0
lines changed

8 files changed

+191
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

verification code/.DS_Store

6 KB
Binary file not shown.
6 KB
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
4+
"""
5+
@version: v1.0
6+
@author: xag
7+
@license: Apache Licence
8+
9+
@site: http://www.xingag.top
10+
@software: PyCharm
11+
@file: AipOcr.py
12+
@time: 1/23/19 15:19
13+
@description:AipOcr是OCR的Python SDK客户端,为使用OCR的开发人员提供了一系列的交互方法。
14+
"""
15+
16+
from aip import AipOcr
17+
18+
""" 你的 APPID AK SK """
19+
APP_ID = '15474**'
20+
API_KEY = 'VBoMZ6XUX119w***'
21+
SECRET_KEY = 'GPvqLVeGIMOR57***'
22+
23+
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
4+
"""
5+
@version: v1.0
6+
@author: xag
7+
@license: Apache Licence
8+
9+
@site: http://www.xingag.top
10+
@software: PyCharm
11+
@file: cnki_demo.py
12+
@time: 1/23/19 15:44
13+
@description:[中国知网注册]
14+
"""
15+
from PIL import Image
16+
from selenium import webdriver
17+
from file_tools import *
18+
from AipOcr import *
19+
import requests
20+
import time
21+
import json
22+
23+
24+
class Cnki_Spider(object):
25+
driver_path = "/usr/local/bin/chromedriver"
26+
27+
def __init__(self):
28+
self.driver = webdriver.Chrome(executable_path=Cnki_Spider.driver_path)
29+
30+
# 包含验证码的页面的截图
31+
self.screen_shot_file_name = "screen_shot.png"
32+
33+
# 验证码图片
34+
self.code_file_name = "image_code.png"
35+
36+
# 注册主页面
37+
self.main_url = 'http://my.cnki.net/elibregister/commonRegister.aspx'
38+
39+
# 待注册的内容
40+
# 昵称
41+
self.username = 'xingag2311'
42+
# 密码
43+
self.password = 'Hu9012782'
44+
# 邮箱地址
45+
self.email = '[email protected]'
46+
47+
def run(self):
48+
# 1.打开注册页面【包含验证码】
49+
self.driver.get(self.main_url)
50+
51+
source = self.driver.page_source
52+
53+
# 2.验证码图片、验证码输入框
54+
code_input_element = self.driver.find_element_by_id('txtOldCheckCode')
55+
code_img_element = self.driver.find_element_by_id('checkcode')
56+
57+
58+
# 外面容器
59+
container_element = self.driver.find_element_by_id('form1')
60+
61+
# 3.获取验证码、填入输入框、点击外面
62+
# 如果没有出现出错的提示tips,就代表输入验证码成功
63+
while True:
64+
65+
code = self.get_code().strip()
66+
67+
error_tips_element = self.driver.find_element_by_id('span_oldcheckcode')
68+
69+
print('验证码为:%s' % code)
70+
code_input_element.clear()
71+
code_input_element.click()
72+
code_input_element.send_keys(code)
73+
74+
# 点击外围的容器,判断验证码是否输入正确
75+
container_element.click()
76+
77+
# 显示了错误信息:验证码输入错误
78+
if error_tips_element.text:
79+
time.sleep(2)
80+
print('验证码验证失败,点击验证码图片')
81+
82+
# 点击验证码图片,重新加载验证码
83+
code_img_element.click()
84+
continue
85+
else:
86+
print('验证码验证成功')
87+
break
88+
89+
# 3.注册
90+
self.register(code)
91+
92+
def get_code(self):
93+
94+
# 1.截图并保存到本地
95+
self.driver.get_screenshot_as_file('./%s' % self.screen_shot_file_name)
96+
97+
# 2.打开文件
98+
screenshot_image = Image.open('./%s' % self.screen_shot_file_name)
99+
100+
# 3.设置要裁剪的区域(验证码所在的区域)
101+
code_box = (899, 819, 1048, 883)
102+
103+
# 4.截图:生成只有验证码的图片
104+
code_image = screenshot_image.crop(code_box)
105+
106+
# 5.保存到本地
107+
code_image.save("./%s" % self.code_file_name)
108+
109+
# 6.以byte读取图片
110+
image = get_file_content("./%s" % self.code_file_name)
111+
112+
# 7.使用百度OCR识别验证码
113+
result = client.basicAccurate(image)
114+
115+
print(result)
116+
117+
# 识别的文字内容
118+
word_result = result.get('words_result')[0].get('words')
119+
120+
return word_result
121+
122+
def register(self, code):
123+
# 用户名输入框
124+
username_input_element = self.driver.find_element_by_id('username')
125+
126+
# 密码输入框
127+
password_input_element = self.driver.find_element_by_id('txtPassword')
128+
129+
# 邮箱输入框
130+
txtEmail_input_element = self.driver.find_element_by_id('txtEmail')
131+
132+
# 注册按钮
133+
submit_btn_element = self.driver.find_element_by_id('ButtonRegister')
134+
135+
username_input_element.send_keys(self.username)
136+
password_input_element.send_keys(self.password)
137+
txtEmail_input_element.send_keys(self.email)
138+
139+
submit_btn_element.click()
140+
141+
142+
if __name__ == '__main__':
143+
spider = Cnki_Spider()
144+
spider.run()
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
4+
"""
5+
@version: v1.0
6+
@author: xag
7+
@license: Apache Licence
8+
9+
@site: http://www.xingag.top
10+
@software: PyCharm
11+
@file: file_tools.py
12+
@time: 1/23/19 15:41
13+
@description:TODO
14+
"""
15+
16+
17+
def get_file_content(filePath):
18+
"""
19+
读取文件
20+
:param filePath: 文件路径
21+
:return: byte类型 <class 'bytes'>
22+
"""
23+
with open(filePath, 'rb') as fp:
24+
return fp.read()
17.8 KB
Loading
324 KB
Loading

0 commit comments

Comments
 (0)