Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ to another registry.

## Start DaemonSet
* Edit [node-problem-detector.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector.yaml) to fit your environment: Set `log` volume to your system log directory. (Used by SystemLogMonitor). For **kubernetes <1.9** use [node-problem-detector-old.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-old.yaml)
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`
* If needed, you can use [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/)
to overwrite the `config/`.
to overwrite the `config/`, Edit [node-problem-detector-config.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-config.yaml) to fit your environment. and create the ConfigMap with `kubectl create -f node-problem-detector-config.yaml`.
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`.

## Start Standalone
To run node-problem-detector standalone, you should set `inClusterConfig` to `false` and
Expand Down
82 changes: 82 additions & 0 deletions deployment/node-problem-detector-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
apiVersion: v1
data:
kernel-monitor.json: |
{
"plugin": "journald",
"pluginConfig": {
"source": "kernel"
},
"logPath": "/var/log/journal",
"lookback": "5m",
"bufferSize": 10,
"source": "kernel-monitor",
"conditions": [
{
"type": "KernelDeadlock",
"reason": "KernelHasNoDeadlock",
"message": "kernel has no deadlock"
}
],
"rules": [
{
"type": "temporary",
"reason": "OOMKilling",
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*"
},
{
"type": "temporary",
"reason": "TaskHung",
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "temporary",
"reason": "UnregisterNetDevice",
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
},
{
"type": "temporary",
"reason": "KernelOops",
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
},
{
"type": "temporary",
"reason": "KernelOops",
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
},
{
"type": "permanent",
"condition": "KernelDeadlock",
"reason": "AUFSUmountHung",
"pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "permanent",
"condition": "KernelDeadlock",
"reason": "DockerHung",
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\."
}
]
}
docker-monitor.json: |
{
"plugin": "journald",
"pluginConfig": {
"source": "docker"
},
"logPath": "/var/log/journal",
"lookback": "5m",
"bufferSize": 10,
"source": "docker-monitor",
"conditions": [],
"rules": [
{
"type": "temporary",
"reason": "CorruptDockerImage",
"pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*"
}
]
}
kind: ConfigMap
metadata:
name: node-problem-detector-config
namespace: default
9 changes: 7 additions & 2 deletions deployment/node-problem-detector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ spec:
command:
- /node-problem-detector
- --logtostderr
- --kernel-monitor=/config/kernel-monitor.json
image: k8s.gcr.io/node-problem-detector:v0.2
- --system-log-monitors=/config/kernel-monitor.json,/config/docker-monitor.json
image: k8s.gcr.io/node-problem-detector:v0.5.0
imagePullPolicy: Always
securityContext:
privileged: true
Expand Down Expand Up @@ -55,3 +55,8 @@ spec:
- name: config
configMap:
name: node-problem-detector-config
items:
- key: kernel-monitor.json
path: kernel-monitor.json
- key: docker-monitor.json
path: docker-monitor.json