Skip to content

Commit 04cca1e

Browse files
CDNC-3181 Cleanup the unused watchdog code (cadence-workflow#5096)
* Removed the Watchdog code and it's service calls * Removed watchdog occurences and dependencies --------- Co-authored-by: David Porter <[email protected]>
1 parent 29a0c97 commit 04cca1e

File tree

8 files changed

+1
-433
lines changed

8 files changed

+1
-433
lines changed

common/constants.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,6 @@ const (
185185
const (
186186
// DefaultESAnalyzerPause controls if we want to dynamically pause the analyzer
187187
DefaultESAnalyzerPause = false
188-
// DefaultCorruptWorkflowWatchdogPause controls if we want to dynamically pause the watchdog
189-
DefaultCorruptWorkflowWatchdogPause = false
190188
// DefaultESAnalyzerTimeWindow controls how many days to go back for ElasticSearch Analyzer
191189
DefaultESAnalyzerTimeWindow = time.Hour * 24 * 30
192190
// DefaultESAnalyzerMaxNumDomains controls how many domains to check

common/dynamicconfig/constants.go

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,12 +1707,7 @@ const (
17071707
// Default value: false
17081708
// Allowed filters: N/A
17091709
EnableESAnalyzer
1710-
// EnableWatchDog decides whether to enable watchdog system worker
1711-
// KeyName: system.enableWatchdog
1712-
// Value type: Bool
1713-
// Default value: false
1714-
// Allowed filters: N/A
1715-
EnableWatchDog
1710+
17161711
// EnableStickyQuery is indicates if sticky query should be enabled per domain
17171712
// KeyName: system.enableStickyQuery
17181713
// Value type: Bool
@@ -1805,12 +1800,6 @@ const (
18051800
// Default value: false
18061801
ESAnalyzerEnableAvgDurationBasedChecks
18071802

1808-
// CorruptWorkflowWatchdogPause defines if we want to dynamically pause the watchdog workflow
1809-
// KeyName: worker.CorruptWorkflowWatchdogPause
1810-
// Value type: bool
1811-
// Default value: false
1812-
CorruptWorkflowWatchdogPause
1813-
18141803
// Lockdown defines if we want to allow failovers of domains to this cluster
18151804
// KeyName: system.Lockdown
18161805
// Value type: bool
@@ -3822,11 +3811,6 @@ var BoolKeys = map[BoolKey]DynamicBool{
38223811
Description: "EnableESAnalyzer decides whether to enable system workers for processing ElasticSearch Analyzer",
38233812
DefaultValue: false,
38243813
},
3825-
EnableWatchDog: DynamicBool{
3826-
KeyName: "system.EnableWatchDog",
3827-
Description: "EnableWatchDog decides whether to enable watchdog system worker",
3828-
DefaultValue: false,
3829-
},
38303814
EnableStickyQuery: DynamicBool{
38313815
KeyName: "system.enableStickyQuery",
38323816
Description: "EnableStickyQuery is indicates if sticky query should be enabled per domain",
@@ -3912,11 +3896,6 @@ var BoolKeys = map[BoolKey]DynamicBool{
39123896
Description: "ESAnalyzerEnableAvgDurationBasedChecks controls if we want to enable avg duration based task refreshes",
39133897
DefaultValue: false,
39143898
},
3915-
CorruptWorkflowWatchdogPause: DynamicBool{
3916-
KeyName: "worker.CorruptWorkflowWatchdogPause",
3917-
Description: "CorruptWorkflowWatchdogPause defines if we want to dynamically pause the watchdog workflow",
3918-
DefaultValue: false,
3919-
},
39203899
Lockdown: DynamicBool{
39213900
KeyName: "system.Lockdown",
39223901
Description: "Lockdown defines if we want to allow failovers of domains to this cluster",

common/metrics/defs.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,8 +1255,6 @@ const (
12551255
CheckDataCorruptionWorkflowScope
12561256
// ESAnalyzerScope is scope used by ElasticSearch Analyzer (esanalyzer) workflow
12571257
ESAnalyzerScope
1258-
// WatchDogScope is scope used by WatchDog workflow
1259-
WatchDogScope
12601258

12611259
NumWorkerScopes
12621260
)
@@ -1826,7 +1824,6 @@ var ScopeDefs = map[ServiceIdx]map[int]scopeDefinition{
18261824
BatcherScope: {operation: "batcher"},
18271825
ParentClosePolicyProcessorScope: {operation: "ParentClosePolicyProcessor"},
18281826
ESAnalyzerScope: {operation: "ESAnalyzer"},
1829-
WatchDogScope: {operation: "WatchDog"},
18301827
},
18311828
}
18321829

@@ -2417,9 +2414,6 @@ const (
24172414
ESAnalyzerNumStuckWorkflowsRefreshed
24182415
ESAnalyzerNumStuckWorkflowsFailedToRefresh
24192416
ESAnalyzerNumLongRunningWorkflows
2420-
WatchDogNumDeletedCorruptWorkflows
2421-
WatchDogNumFailedToDeleteCorruptWorkflows
2422-
WatchDogNumCorruptWorkflowProcessed
24232417

24242418
NumWorkerMetrics
24252419
)
@@ -3017,9 +3011,6 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
30173011
ESAnalyzerNumStuckWorkflowsRefreshed: {metricName: "es_analyzer_num_stuck_workflows_refreshed", metricType: Counter},
30183012
ESAnalyzerNumStuckWorkflowsFailedToRefresh: {metricName: "es_analyzer_num_stuck_workflows_failed_to_refresh", metricType: Counter},
30193013
ESAnalyzerNumLongRunningWorkflows: {metricName: "es_analyzer_num_long_running_workflows", metricType: Counter},
3020-
WatchDogNumDeletedCorruptWorkflows: {metricName: "watchdog_num_deleted_corrupt_workflows", metricType: Counter},
3021-
WatchDogNumFailedToDeleteCorruptWorkflows: {metricName: "watchdog_num_failed_to_delete_corrupt_workflows", metricType: Counter},
3022-
WatchDogNumCorruptWorkflowProcessed: {metricName: "watchdog_num_corrupt_workflows_processed", metricType: Counter},
30233014
},
30243015
}
30253016

service/frontend/adminHandler.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,6 @@ func (adh *adminHandlerImpl) MaintainCorruptWorkflow(
388388
tag.WorkflowRunID(request.GetExecution().GetRunID()),
389389
)
390390

391-
scope := adh.GetMetricsClient().Scope(metrics.WatchDogScope)
392-
tagged := scope.Tagged(metrics.DomainTag(request.Domain))
393391
resp := &types.AdminMaintainWorkflowResponse{
394392
HistoryDeleted: false,
395393
ExecutionsDeleted: false,
@@ -416,11 +414,6 @@ func (adh *adminHandlerImpl) MaintainCorruptWorkflow(
416414
logger.Info(fmt.Sprintf("Will delete workflow because (%v) returned corrupted error (%#v)",
417415
functionName, err))
418416
resp, err = adh.DeleteWorkflow(ctx, request)
419-
if err == nil {
420-
tagged.AddCounter(metrics.WatchDogNumDeletedCorruptWorkflows, 1)
421-
} else {
422-
tagged.AddCounter(metrics.WatchDogNumFailedToDeleteCorruptWorkflows, 1)
423-
}
424417
return resp, nil
425418
}
426419
}

service/worker/service.go

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ import (
4949
"github.com/uber/cadence/service/worker/scanner/tasklist"
5050
"github.com/uber/cadence/service/worker/scanner/timers"
5151
"github.com/uber/cadence/service/worker/shadower"
52-
"github.com/uber/cadence/service/worker/watchdog"
5352
)
5453

5554
type (
@@ -73,7 +72,6 @@ type (
7372
ScannerCfg *scanner.Config
7473
BatcherCfg *batcher.Config
7574
ESAnalyzerCfg *esanalyzer.Config
76-
WatchdogConfig *watchdog.Config
7775
failoverManagerCfg *failovermanager.Config
7876
ThrottledLogRPS dynamicconfig.IntPropertyFn
7977
PersistenceGlobalMaxQPS dynamicconfig.IntPropertyFn
@@ -176,14 +174,10 @@ func NewConfig(params *resource.Params) *Config {
176174
ESAnalyzerWorkflowVersionDomains: dc.GetStringProperty(dynamicconfig.ESAnalyzerWorkflowVersionMetricDomains),
177175
ESAnalyzerWorkflowTypeDomains: dc.GetStringProperty(dynamicconfig.ESAnalyzerWorkflowTypeMetricDomains),
178176
},
179-
WatchdogConfig: &watchdog.Config{
180-
CorruptWorkflowWatchdogPause: dc.GetBoolProperty(dynamicconfig.CorruptWorkflowWatchdogPause),
181-
},
182177
EnableBatcher: dc.GetBoolProperty(dynamicconfig.EnableBatcher),
183178
EnableParentClosePolicyWorker: dc.GetBoolProperty(dynamicconfig.EnableParentClosePolicyWorker),
184179
NumParentClosePolicySystemWorkflows: dc.GetIntProperty(dynamicconfig.NumParentClosePolicySystemWorkflows),
185180
EnableESAnalyzer: dc.GetBoolProperty(dynamicconfig.EnableESAnalyzer),
186-
EnableWatchDog: dc.GetBoolProperty(dynamicconfig.EnableWatchDog),
187181
EnableFailoverManager: dc.GetBoolProperty(dynamicconfig.EnableFailoverManager),
188182
EnableWorkflowShadower: dc.GetBoolProperty(dynamicconfig.EnableWorkflowShadower),
189183
ThrottledLogRPS: dc.GetIntProperty(dynamicconfig.WorkerThrottledLogRPS),
@@ -242,9 +236,6 @@ func (s *Service) Start() {
242236
if s.config.EnableESAnalyzer() {
243237
s.startESAnalyzer()
244238
}
245-
if s.config.EnableWatchDog() {
246-
s.startWatchDog()
247-
}
248239
if s.config.EnableFailoverManager() {
249240
s.startFailoverManager()
250241
}
@@ -306,24 +297,6 @@ func (s *Service) startESAnalyzer() {
306297
}
307298
}
308299

309-
func (s *Service) startWatchDog() {
310-
watchdog := watchdog.New(
311-
s.params.PublicClient,
312-
s.GetFrontendClient(),
313-
s.GetClientBean(),
314-
s.GetLogger(),
315-
s.GetMetricsClient(),
316-
s.params.MetricScope,
317-
s.Resource,
318-
s.GetDomainCache(),
319-
s.config.WatchdogConfig,
320-
)
321-
322-
if err := watchdog.Start(); err != nil {
323-
s.GetLogger().Fatal("error starting watchdog", tag.Error(err))
324-
}
325-
}
326-
327300
func (s *Service) startBatcher() {
328301
params := &batcher.BootstrapParams{
329302
Config: *s.config.BatcherCfg,

service/worker/watchdog/client.go

Lines changed: 0 additions & 102 deletions
This file was deleted.

0 commit comments

Comments
 (0)