|
| 1 | +#!/usr/bin/env python |
| 2 | +# encoding: utf-8 |
| 3 | + |
| 4 | +""" |
| 5 | +@version: v1.0 |
| 6 | +@author: xag |
| 7 | +@license: Apache Licence |
| 8 | + |
| 9 | +@site: http://www.xingag.top |
| 10 | +@software: PyCharm |
| 11 | +@file: cnki_demo.py |
| 12 | +@time: 1/23/19 15:44 |
| 13 | +@description:[中国知网注册] |
| 14 | +""" |
| 15 | +from PIL import Image |
| 16 | +from selenium import webdriver |
| 17 | +from file_tools import * |
| 18 | +from AipOcr import * |
| 19 | +import requests |
| 20 | +import time |
| 21 | +import json |
| 22 | + |
| 23 | + |
| 24 | +class Cnki_Spider(object): |
| 25 | + driver_path = "/usr/local/bin/chromedriver" |
| 26 | + |
| 27 | + def __init__(self): |
| 28 | + self.driver = webdriver.Chrome(executable_path=Cnki_Spider.driver_path) |
| 29 | + |
| 30 | + # 包含验证码的页面的截图 |
| 31 | + self.screen_shot_file_name = "screen_shot.png" |
| 32 | + |
| 33 | + # 验证码图片 |
| 34 | + self.code_file_name = "image_code.png" |
| 35 | + |
| 36 | + # 注册主页面 |
| 37 | + self.main_url = 'http://my.cnki.net/elibregister/commonRegister.aspx' |
| 38 | + |
| 39 | + # 待注册的内容 |
| 40 | + # 昵称 |
| 41 | + self.username = 'xingag2311' |
| 42 | + # 密码 |
| 43 | + self.password = 'Hu9012782' |
| 44 | + # 邮箱地址 |
| 45 | + |
| 46 | + |
| 47 | + def run(self): |
| 48 | + # 1.打开注册页面【包含验证码】 |
| 49 | + self.driver.get(self.main_url) |
| 50 | + |
| 51 | + source = self.driver.page_source |
| 52 | + |
| 53 | + # 2.验证码图片、验证码输入框 |
| 54 | + code_input_element = self.driver.find_element_by_id('txtOldCheckCode') |
| 55 | + code_img_element = self.driver.find_element_by_id('checkcode') |
| 56 | + |
| 57 | + |
| 58 | + # 外面容器 |
| 59 | + container_element = self.driver.find_element_by_id('form1') |
| 60 | + |
| 61 | + # 3.获取验证码、填入输入框、点击外面 |
| 62 | + # 如果没有出现出错的提示tips,就代表输入验证码成功 |
| 63 | + while True: |
| 64 | + |
| 65 | + code = self.get_code().strip() |
| 66 | + |
| 67 | + error_tips_element = self.driver.find_element_by_id('span_oldcheckcode') |
| 68 | + |
| 69 | + print('验证码为:%s' % code) |
| 70 | + code_input_element.clear() |
| 71 | + code_input_element.click() |
| 72 | + code_input_element.send_keys(code) |
| 73 | + |
| 74 | + # 点击外围的容器,判断验证码是否输入正确 |
| 75 | + container_element.click() |
| 76 | + |
| 77 | + # 显示了错误信息:验证码输入错误 |
| 78 | + if error_tips_element.text: |
| 79 | + time.sleep(2) |
| 80 | + print('验证码验证失败,点击验证码图片') |
| 81 | + |
| 82 | + # 点击验证码图片,重新加载验证码 |
| 83 | + code_img_element.click() |
| 84 | + continue |
| 85 | + else: |
| 86 | + print('验证码验证成功') |
| 87 | + break |
| 88 | + |
| 89 | + # 3.注册 |
| 90 | + self.register(code) |
| 91 | + |
| 92 | + def get_code(self): |
| 93 | + |
| 94 | + # 1.截图并保存到本地 |
| 95 | + self.driver.get_screenshot_as_file('./%s' % self.screen_shot_file_name) |
| 96 | + |
| 97 | + # 2.打开文件 |
| 98 | + screenshot_image = Image.open('./%s' % self.screen_shot_file_name) |
| 99 | + |
| 100 | + # 3.设置要裁剪的区域(验证码所在的区域) |
| 101 | + code_box = (899, 819, 1048, 883) |
| 102 | + |
| 103 | + # 4.截图:生成只有验证码的图片 |
| 104 | + code_image = screenshot_image.crop(code_box) |
| 105 | + |
| 106 | + # 5.保存到本地 |
| 107 | + code_image.save("./%s" % self.code_file_name) |
| 108 | + |
| 109 | + # 6.以byte读取图片 |
| 110 | + image = get_file_content("./%s" % self.code_file_name) |
| 111 | + |
| 112 | + # 7.使用百度OCR识别验证码 |
| 113 | + result = client.basicAccurate(image) |
| 114 | + |
| 115 | + print(result) |
| 116 | + |
| 117 | + # 识别的文字内容 |
| 118 | + word_result = result.get('words_result')[0].get('words') |
| 119 | + |
| 120 | + return word_result |
| 121 | + |
| 122 | + def register(self, code): |
| 123 | + # 用户名输入框 |
| 124 | + username_input_element = self.driver.find_element_by_id('username') |
| 125 | + |
| 126 | + # 密码输入框 |
| 127 | + password_input_element = self.driver.find_element_by_id('txtPassword') |
| 128 | + |
| 129 | + # 邮箱输入框 |
| 130 | + txtEmail_input_element = self.driver.find_element_by_id('txtEmail') |
| 131 | + |
| 132 | + # 注册按钮 |
| 133 | + submit_btn_element = self.driver.find_element_by_id('ButtonRegister') |
| 134 | + |
| 135 | + username_input_element.send_keys(self.username) |
| 136 | + password_input_element.send_keys(self.password) |
| 137 | + txtEmail_input_element.send_keys(self.email) |
| 138 | + |
| 139 | + submit_btn_element.click() |
| 140 | + |
| 141 | + |
| 142 | +if __name__ == '__main__': |
| 143 | + spider = Cnki_Spider() |
| 144 | + spider.run() |
0 commit comments