1
1
# coding: utf-8
2
2
from __future__ import unicode_literals
3
3
4
- import calendar
5
4
import re
6
- import time
7
5
8
6
from .amp import AMPIE
9
7
from .common import InfoExtractor
10
- from .youtube import YoutubeIE
11
- from ..compat import compat_urlparse
8
+ from ..utils import (
9
+ parse_duration ,
10
+ parse_iso8601 ,
11
+ try_get ,
12
+ )
12
13
13
14
14
15
class AbcNewsVideoIE (AMPIE ):
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
18
19
(?:
19
20
abcnews\.go\.com/
20
21
(?:
21
- [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
22
- video/embed\?.*?\bid=
22
+ (?: [^/]+/)* video/(?P<display_id>[0-9a-z-]+)-|
23
+ video/(?: embed|itemfeed) \?.*?\bid=
23
24
)|
24
25
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
25
26
)
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
36
37
'description' : 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.' ,
37
38
'duration' : 180 ,
38
39
'thumbnail' : r're:^https?://.*\.jpg$' ,
40
+ 'timestamp' : 1380454200 ,
41
+ 'upload_date' : '20130929' ,
39
42
},
40
43
'params' : {
41
44
# m3u8 download
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
47
50
}, {
48
51
'url' : 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478' ,
49
52
'only_matching' : True ,
53
+ }, {
54
+ 'url' : 'http://abcnews.go.com/video/itemfeed?id=46979033' ,
55
+ 'only_matching' : True ,
56
+ }, {
57
+ 'url' : 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761' ,
58
+ 'only_matching' : True ,
50
59
}]
51
60
52
61
def _real_extract (self , url ):
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
67
76
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
68
77
69
78
_TESTS = [{
70
- 'url' : 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY' ,
79
+ # Youtube Embeds
80
+ 'url' : 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501' ,
71
81
'info_dict' : {
72
- 'id' : '10505354' ,
73
- 'ext' : 'flv' ,
74
- 'display_id' : 'dramatic-video-rare-death-job-america' ,
75
- 'title' : 'Occupational Hazards' ,
76
- 'description' : 'Nightline investigates the dangers that lurk at various jobs.' ,
77
- 'thumbnail' : r're:^https?://.*\.jpg$' ,
78
- 'upload_date' : '20100428' ,
79
- 'timestamp' : 1272412800 ,
82
+ 'id' : '51286501' ,
83
+ 'title' : "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player" ,
84
+ 'description' : 'Billingsley went from a child actor to Hollywood power player.' ,
80
85
},
81
- 'add_ie ' : [ 'AbcNewsVideo' ] ,
86
+ 'playlist_count ' : 5 ,
82
87
}, {
83
88
'url' : 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818' ,
84
89
'info_dict' : {
85
90
'id' : '38897857' ,
86
91
'ext' : 'mp4' ,
87
- 'display_id' : 'justin-timberlake-performs-stop-feeling-eurovision-2016' ,
88
92
'title' : 'Justin Timberlake Drops Hints For Secret Single' ,
89
93
'description' : 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.' ,
90
- 'upload_date' : '20160515 ' ,
91
- 'timestamp' : 1463329500 ,
94
+ 'upload_date' : '20160505 ' ,
95
+ 'timestamp' : 1462442280 ,
92
96
},
93
97
'params' : {
94
98
# m3u8 download
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
100
104
}, {
101
105
'url' : 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343' ,
102
106
'only_matching' : True ,
107
+ }, {
108
+ # inline.type == 'video'
109
+ 'url' : 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343' ,
110
+ 'only_matching' : True ,
103
111
}]
104
112
105
113
def _real_extract (self , url ):
106
- mobj = re .match (self ._VALID_URL , url )
107
- display_id = mobj .group ('display_id' )
108
- video_id = mobj .group ('id' )
109
-
110
- webpage = self ._download_webpage (url , video_id )
111
- video_url = self ._search_regex (
112
- r'window\.abcnvideo\.url\s*=\s*"([^"]+)"' , webpage , 'video URL' )
113
- full_video_url = compat_urlparse .urljoin (url , video_url )
114
-
115
- youtube_url = YoutubeIE ._extract_url (webpage )
116
-
117
- timestamp = None
118
- date_str = self ._html_search_regex (
119
- r'<span[^>]+class="timestamp">([^<]+)</span>' ,
120
- webpage , 'timestamp' , fatal = False )
121
- if date_str :
122
- tz_offset = 0
123
- if date_str .endswith (' ET' ): # Eastern Time
124
- tz_offset = - 5
125
- date_str = date_str [:- 3 ]
126
- date_formats = ['%b. %d, %Y' , '%b %d, %Y, %I:%M %p' ]
127
- for date_format in date_formats :
128
- try :
129
- timestamp = calendar .timegm (time .strptime (date_str .strip (), date_format ))
130
- except ValueError :
131
- continue
132
- if timestamp is not None :
133
- timestamp -= tz_offset * 3600
134
-
135
- entry = {
136
- '_type' : 'url_transparent' ,
137
- 'ie_key' : AbcNewsVideoIE .ie_key (),
138
- 'url' : full_video_url ,
139
- 'id' : video_id ,
140
- 'display_id' : display_id ,
141
- 'timestamp' : timestamp ,
142
- }
143
-
144
- if youtube_url :
145
- entries = [entry , self .url_result (youtube_url , ie = YoutubeIE .ie_key ())]
146
- return self .playlist_result (entries )
147
-
148
- return entry
114
+ story_id = self ._match_id (url )
115
+ webpage = self ._download_webpage (url , story_id )
116
+ story = self ._parse_json (self ._search_regex (
117
+ r"window\['__abcnews__'\]\s*=\s*({.+?});" ,
118
+ webpage , 'data' ), story_id )['page' ]['content' ]['story' ]['everscroll' ][0 ]
119
+ article_contents = story .get ('articleContents' ) or {}
120
+
121
+ def entries ():
122
+ featured_video = story .get ('featuredVideo' ) or {}
123
+ feed = try_get (featured_video , lambda x : x ['video' ]['feed' ])
124
+ if feed :
125
+ yield {
126
+ '_type' : 'url' ,
127
+ 'id' : featured_video .get ('id' ),
128
+ 'title' : featured_video .get ('name' ),
129
+ 'url' : feed ,
130
+ 'thumbnail' : featured_video .get ('images' ),
131
+ 'description' : featured_video .get ('description' ),
132
+ 'timestamp' : parse_iso8601 (featured_video .get ('uploadDate' )),
133
+ 'duration' : parse_duration (featured_video .get ('duration' )),
134
+ 'ie_key' : AbcNewsVideoIE .ie_key (),
135
+ }
136
+
137
+ for inline in (article_contents .get ('inlines' ) or []):
138
+ inline_type = inline .get ('type' )
139
+ if inline_type == 'iframe' :
140
+ iframe_url = try_get (inline , lambda x : x ['attrs' ]['src' ])
141
+ if iframe_url :
142
+ yield self .url_result (iframe_url )
143
+ elif inline_type == 'video' :
144
+ video_id = inline .get ('id' )
145
+ if video_id :
146
+ yield {
147
+ '_type' : 'url' ,
148
+ 'id' : video_id ,
149
+ 'url' : 'http://abcnews.go.com/video/embed?id=' + video_id ,
150
+ 'thumbnail' : inline .get ('imgSrc' ) or inline .get ('imgDefault' ),
151
+ 'description' : inline .get ('description' ),
152
+ 'duration' : parse_duration (inline .get ('duration' )),
153
+ 'ie_key' : AbcNewsVideoIE .ie_key (),
154
+ }
155
+
156
+ return self .playlist_result (
157
+ entries (), story_id , article_contents .get ('headline' ),
158
+ article_contents .get ('subHead' ))
0 commit comments