1
1
import celery
2
- from datetime import datetime
2
+ from datetime import datetime , timezone
3
3
from functools import partial
4
4
5
5
from lightflow .logger import get_logger
@@ -26,7 +26,7 @@ def execute_workflow(self, workflow, workflow_id=None):
26
26
workflow_id (string): If a workflow ID is provided the workflow run will use
27
27
this ID, if not a new ID will be auto generated.
28
28
"""
29
- start_time = datetime .now ()
29
+ start_time = datetime .utcnow ()
30
30
31
31
logger .info ('Running workflow <{}>' .format (workflow .name ))
32
32
data_store = DataStore (** self .app .user_options ['config' ].data_store ,
@@ -39,7 +39,7 @@ def execute_workflow(self, workflow, workflow_id=None):
39
39
workflow_id = data_store .add (payload = {
40
40
'name' : workflow .name ,
41
41
'queue' : workflow .queue ,
42
- 'start_time' : datetime . utcnow ()
42
+ 'start_time' : start_time
43
43
})
44
44
logger .info ('Created workflow ID: {}' .format (workflow_id ))
45
45
@@ -48,7 +48,7 @@ def execute_workflow(self, workflow, workflow_id=None):
48
48
job_type = JobType .Workflow ,
49
49
name = workflow .name ,
50
50
queue = workflow .queue ,
51
- time = datetime . utcnow () ,
51
+ time = start_time ,
52
52
workflow_id = workflow_id ,
53
53
duration = None )
54
54
@@ -62,7 +62,7 @@ def execute_workflow(self, workflow, workflow_id=None):
62
62
'type' : JobType .Workflow ,
63
63
'workflow_id' : workflow_id ,
64
64
'queue' : workflow .queue ,
65
- 'start_time' : datetime . utcnow () ,
65
+ 'start_time' : start_time ,
66
66
'arguments' : workflow .provided_arguments })
67
67
68
68
# run the DAGs in the workflow
@@ -71,6 +71,16 @@ def execute_workflow(self, workflow, workflow_id=None):
71
71
signal_server = signal_server ,
72
72
workflow_id = workflow_id )
73
73
74
+ end_time = datetime .utcnow ()
75
+ duration = (end_time - start_time ).total_seconds ()
76
+
77
+ # update data store with provenance information
78
+ store_doc = data_store .get (workflow_id )
79
+ store_doc .set (key = 'end_time' , value = end_time ,
80
+ section = DataStoreDocumentSection .Meta )
81
+ store_doc .set (key = 'duration' , value = duration ,
82
+ section = DataStoreDocumentSection .Meta )
83
+
74
84
# send custom celery event that the workflow has succeeded
75
85
event_name = JobEventName .Succeeded if not workflow .is_stopped \
76
86
else JobEventName .Aborted
@@ -79,9 +89,9 @@ def execute_workflow(self, workflow, workflow_id=None):
79
89
job_type = JobType .Workflow ,
80
90
name = workflow .name ,
81
91
queue = workflow .queue ,
82
- time = datetime . utcnow () ,
92
+ time = end_time ,
83
93
workflow_id = workflow_id ,
84
- duration = ( datetime . now () - start_time ). total_seconds () )
94
+ duration = duration )
85
95
86
96
logger .info ('Finished workflow <{}>' .format (workflow .name ))
87
97
@@ -101,15 +111,23 @@ def execute_dag(self, dag, workflow_id, data=None):
101
111
the first tasks in the dag. This allows the transfer of
102
112
data from dag to dag.
103
113
"""
104
- start_time = datetime .now ()
114
+ start_time = datetime .utcnow ()
105
115
logger .info ('Running DAG <{}>' .format (dag .name ))
106
116
117
+ store_doc = DataStore (** self .app .user_options ['config' ].data_store ,
118
+ auto_connect = True ).get (workflow_id )
119
+ store_loc = 'log.{}' .format (dag .name )
120
+
121
+ # update data store with provenance information
122
+ store_doc .set (key = '{}.start_time' .format (store_loc ), value = start_time ,
123
+ section = DataStoreDocumentSection .Meta )
124
+
107
125
# send custom celery event that the dag has been started
108
126
self .send_event (JobEventName .Started ,
109
127
job_type = JobType .Dag ,
110
128
name = dag .name ,
111
129
queue = dag .queue ,
112
- time = datetime . utcnow () ,
130
+ time = start_time ,
113
131
workflow_id = workflow_id ,
114
132
duration = None )
115
133
@@ -128,15 +146,24 @@ def execute_dag(self, dag, workflow_id, data=None):
128
146
signal = signal ,
129
147
data = data )
130
148
149
+ end_time = datetime .utcnow ()
150
+ duration = (end_time - start_time ).total_seconds ()
151
+
152
+ # update data store with provenance information
153
+ store_doc .set (key = '{}.end_time' .format (store_loc ), value = end_time ,
154
+ section = DataStoreDocumentSection .Meta )
155
+ store_doc .set (key = '{}.duration' .format (store_loc ), value = duration ,
156
+ section = DataStoreDocumentSection .Meta )
157
+
131
158
# send custom celery event that the dag has succeeded
132
159
event_name = JobEventName .Succeeded if not signal .is_stopped else JobEventName .Aborted
133
160
self .send_event (event_name ,
134
161
job_type = JobType .Dag ,
135
162
name = dag .name ,
136
163
queue = dag .queue ,
137
- time = datetime . utcnow () ,
164
+ time = end_time ,
138
165
workflow_id = workflow_id ,
139
- duration = ( datetime . now () - start_time ). total_seconds () )
166
+ duration = duration )
140
167
141
168
logger .info ('Finished DAG <{}>' .format (dag .name ))
142
169
@@ -153,10 +180,11 @@ def execute_task(self, task, workflow_id, data=None):
153
180
data (MultiTaskData): An optional MultiTaskData object that contains the data
154
181
that has been passed down from upstream tasks.
155
182
"""
156
- start_time = datetime .now ()
183
+ start_time = datetime .utcnow ()
157
184
158
185
store_doc = DataStore (** self .app .user_options ['config' ].data_store ,
159
186
auto_connect = True ).get (workflow_id )
187
+ store_loc = 'log.{}.tasks.{}' .format (task .dag_name , task .name )
160
188
161
189
def handle_callback (message , event_type , exc = None ):
162
190
msg = '{}: {}' .format (message , str (exc )) if exc is not None else message
@@ -169,26 +197,40 @@ def handle_callback(message, event_type, exc=None):
169
197
else :
170
198
logger .info (msg )
171
199
172
- # store a log into the persistent data store
200
+ current_time = datetime .utcnow ()
201
+
202
+ # store provenance information about a task
173
203
if event_type != JobEventName .Started :
174
- duration = (datetime .now () - start_time ).total_seconds ()
204
+ duration = (current_time - start_time ).total_seconds ()
205
+
206
+ store_doc .set (key = '{}.end_time' .format (store_loc ),
207
+ value = current_time ,
208
+ section = DataStoreDocumentSection .Meta )
175
209
176
- store_doc .set (key = 'log. {}.{}. duration' .format (task . dag_name , task . name ),
210
+ store_doc .set (key = '{}.duration' .format (store_loc ),
177
211
value = duration ,
178
212
section = DataStoreDocumentSection .Meta )
213
+ else :
214
+ # store provenance information about a task
215
+ store_doc .set (key = '{}.start_time' .format (store_loc ),
216
+ value = start_time ,
217
+ section = DataStoreDocumentSection .Meta )
179
218
180
- store_doc .set (key = 'log. {}.{}. worker' .format (task . dag_name , task . name ),
219
+ store_doc .set (key = '{}.worker' .format (store_loc ),
181
220
value = self .request .hostname ,
182
221
section = DataStoreDocumentSection .Meta )
183
- else :
222
+
223
+ store_doc .set (key = '{}.queue' .format (store_loc ),
224
+ value = task .queue ,
225
+ section = DataStoreDocumentSection .Meta )
184
226
duration = None
185
227
186
228
# send custom celery event
187
229
self .send_event (event_type ,
188
230
job_type = JobType .Task ,
189
231
name = task .name ,
190
232
queue = task .queue ,
191
- time = datetime . utcnow () ,
233
+ time = current_time ,
192
234
workflow_id = workflow_id ,
193
235
duration = duration )
194
236
0 commit comments