Skip to content

Commit 3e313ff

Browse files
committed
更新了爬虫部分代码
1 parent 98dc244 commit 3e313ff

File tree

1 file changed

+7
-2
lines changed
  • Day66-75/code/image360/image360/spiders

1 file changed

+7
-2
lines changed

Day66-75/code/image360/image360/spiders/taobao.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# -*- coding: utf-8 -*-
2+
from io import StringIO
23
from urllib.parse import urlencode
4+
import re
35

46
import scrapy
57

@@ -26,6 +28,9 @@ def parse(self, response):
2628
item = GoodsItem()
2729
item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
2830
item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
29-
item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first()
31+
segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract()
32+
title = StringIO()
33+
for segment in segments:
34+
title.write(re.sub('\s', '', segment))
35+
item['title'] = title.getvalue()
3036
yield item
31-

0 commit comments

Comments
 (0)