99from .Fetcher import Fetcher
1010import sqlite3
1111import datetime
12- import time
12+ import threading
1313
1414conn = sqlite3 .connect (DATABASE_PATH , detect_types = sqlite3 .PARSE_DECLTYPES | sqlite3 .PARSE_COLNAMES )
15+ # 线程锁
16+ conn_lock = threading .Lock ()
17+ # 进程锁
18+ proc_lock = None
19+
20+ def set_proc_lock (proc_lock_sub ):
21+ """
22+ 设置进程锁
23+ proc_lock_sub : main中的进程锁
24+ """
25+ global proc_lock
26+ proc_lock = proc_lock_sub
1527
1628def pushNewFetch (fetcher_name , protocol , ip , port ):
1729 """
@@ -21,13 +33,13 @@ def pushNewFetch(fetcher_name, protocol, ip, port):
2133 ip : 代理IP地址
2234 port : 代理端口
2335 """
24- time .sleep (0.1 ) # 为了解决并发读写饿死的问题
25-
2636 p = Proxy ()
2737 p .fetcher_name = fetcher_name
2838 p .protocol = protocol
2939 p .ip = ip
3040 p .port = port
41+ conn_lock .acquire ()
42+ proc_lock .acquire ()
3143
3244 c = conn .cursor ()
3345 c .execute ('BEGIN EXCLUSIVE TRANSACTION;' )
@@ -43,6 +55,8 @@ def pushNewFetch(fetcher_name, protocol, ip, port):
4355 c .execute ('INSERT INTO proxies VALUES (?,?,?,?,?,?,?,?,?)' , p .params ())
4456 c .close ()
4557 conn .commit ()
58+ conn_lock .release ()
59+ proc_lock .release ()
4660
4761def getToValidate (max_count = 1 ):
4862 """
@@ -51,6 +65,8 @@ def getToValidate(max_count=1):
5165 max_count : 返回数量限制
5266 返回 : list[Proxy]
5367 """
68+ conn_lock .acquire ()
69+ proc_lock .acquire ()
5470 c = conn .cursor ()
5571 c .execute ('BEGIN EXCLUSIVE TRANSACTION;' )
5672 c .execute ('SELECT * FROM proxies WHERE to_validate_date<=? AND validated=? ORDER BY to_validate_date LIMIT ?' , (
@@ -67,6 +83,8 @@ def getToValidate(max_count=1):
6783 proxies = proxies + [Proxy .decode (row ) for row in c ]
6884 c .close ()
6985 conn .commit ()
86+ conn_lock .release ()
87+ proc_lock .release ()
7088 return proxies
7189
7290def pushValidateResult (proxy , success , latency ):
@@ -76,10 +94,10 @@ def pushValidateResult(proxy, success, latency):
7694 success : True/False,验证是否成功
7795 latency : 本次验证所用的时间(单位毫秒)
7896 """
79- time .sleep (0.01 ) # 为了解决并发读写饿死的问题
80-
8197 p = proxy
8298 should_remove = p .validate (success , latency )
99+ conn_lock .acquire ()
100+ proc_lock .acquire ()
83101 if should_remove :
84102 conn .execute ('DELETE FROM proxies WHERE protocol=? AND ip=? AND port=?' , (p .protocol , p .ip , p .port ))
85103 else :
@@ -92,19 +110,25 @@ def pushValidateResult(proxy, success, latency):
92110 p .protocol , p .ip , p .port
93111 ))
94112 conn .commit ()
113+ conn_lock .release ()
114+ proc_lock .release ()
95115
96116def getValidatedRandom (max_count ):
97117 """
98118 从通过了验证的代理中,随机选择max_count个代理返回
99119 max_count<=0表示不做数量限制
100120 返回 : list[Proxy]
101121 """
122+ conn_lock .acquire ()
123+ proc_lock .acquire ()
102124 if max_count > 0 :
103125 r = conn .execute ('SELECT * FROM proxies WHERE validated=? ORDER BY RANDOM() LIMIT ?' , (True , max_count ))
104126 else :
105127 r = conn .execute ('SELECT * FROM proxies WHERE validated=? ORDER BY RANDOM()' , (True ,))
106128 proxies = [Proxy .decode (row ) for row in r ]
107129 r .close ()
130+ conn_lock .release ()
131+ proc_lock .release ()
108132 return proxies
109133
110134def pushFetcherResult (name , proxies_cnt ):
@@ -113,8 +137,8 @@ def pushFetcherResult(name, proxies_cnt):
113137 name : 爬取器的名称
114138 proxies_cnt : 本次爬取到的代理数量
115139 """
116- time . sleep ( 0.1 ) # 为了解决并发读写饿死的问题
117-
140+ conn_lock . acquire ()
141+ proc_lock . acquire ()
118142 c = conn .cursor ()
119143 c .execute ('BEGIN EXCLUSIVE TRANSACTION;' )
120144 c .execute ('SELECT * FROM fetchers WHERE name=?' , (name ,))
@@ -131,13 +155,17 @@ def pushFetcherResult(name, proxies_cnt):
131155 ))
132156 c .close ()
133157 conn .commit ()
158+ conn_lock .release ()
159+ proc_lock .release ()
134160
135161def pushFetcherEnable (name , enable ):
136162 """
137163 设置是否起用对应爬取器,被禁用的爬取器将不会被运行
138164 name : 爬取器的名称
139165 enable : True/False, 是否启用
140166 """
167+ conn_lock .acquire ()
168+ proc_lock .acquire ()
141169 c = conn .cursor ()
142170 c .execute ('BEGIN EXCLUSIVE TRANSACTION;' )
143171 c .execute ('SELECT * FROM fetchers WHERE name=?' , (name ,))
@@ -152,25 +180,35 @@ def pushFetcherEnable(name, enable):
152180 ))
153181 c .close ()
154182 conn .commit ()
183+ conn_lock .release ()
184+ proc_lock .release ()
155185
156186def getAllFetchers ():
157187 """
158188 获取所有的爬取器以及状态
159189 返回 : list[Fetcher]
160190 """
191+ conn_lock .acquire ()
192+ proc_lock .acquire ()
161193 r = conn .execute ('SELECT * FROM fetchers' )
162194 fetchers = [Fetcher .decode (row ) for row in r ]
163195 r .close ()
196+ conn_lock .release ()
197+ proc_lock .release ()
164198 return fetchers
165199
166200def getFetcher (name ):
167201 """
168202 获取指定爬取器以及状态
169203 返回 : Fetcher
170204 """
205+ conn_lock .acquire ()
206+ proc_lock .acquire ()
171207 r = conn .execute ('SELECT * FROM fetchers WHERE name=?' , (name ,))
172208 row = r .fetchone ()
173209 r .close ()
210+ conn_lock .release ()
211+ proc_lock .release ()
174212 if row is None :
175213 return None
176214 else :
@@ -182,16 +220,22 @@ def getProxyCount(fetcher_name):
182220 fetcher_name : 爬取器名称
183221 返回 : int
184222 """
223+ conn_lock .acquire ()
224+ proc_lock .acquire ()
185225 r = conn .execute ('SELECT count(*) FROM proxies WHERE fetcher_name=?' , (fetcher_name ,))
186226 cnt = r .fetchone ()[0 ]
187227 r .close ()
228+ conn_lock .release ()
229+ proc_lock .release ()
188230 return cnt
189231
190232def getProxiesStatus ():
191233 """
192234 获取代理状态,包括`全部代理数量`,`当前可用代理数量`,`等待验证代理数量`
193235 返回 : dict
194236 """
237+ conn_lock .acquire ()
238+ proc_lock .acquire ()
195239 r = conn .execute ('SELECT count(*) FROM proxies' )
196240 sum_proxies_cnt = r .fetchone ()[0 ]
197241 r .close ()
@@ -203,7 +247,8 @@ def getProxiesStatus():
203247 r = conn .execute ('SELECT count(*) FROM proxies WHERE to_validate_date<=?' , (datetime .datetime .now (),))
204248 pending_proxies_cnt = r .fetchone ()[0 ]
205249 r .close ()
206-
250+ conn_lock .release ()
251+ proc_lock .release ()
207252 return dict (
208253 sum_proxies_cnt = sum_proxies_cnt ,
209254 validated_proxies_cnt = validated_proxies_cnt ,
@@ -214,8 +259,12 @@ def pushClearFetchersStatus():
214259 """
215260 清空爬取器的统计信息,包括sum_proxies_cnt,last_proxies_cnt,last_fetch_date
216261 """
262+ conn_lock .acquire ()
263+ proc_lock .acquire ()
217264 c = conn .cursor ()
218265 c .execute ('BEGIN EXCLUSIVE TRANSACTION;' )
219266 c .execute ('UPDATE fetchers SET sum_proxies_cnt=?, last_proxies_cnt=?, last_fetch_date=?' , (0 , 0 , None ))
220267 c .close ()
221268 conn .commit ()
269+ conn_lock .release ()
270+ proc_lock .release ()
0 commit comments