Utoljára aktív 1 month ago

Revízió 8a1ca11954b99d81c108de2500bb1c85be341dcf

munin_docker_ Eredeti
1#!/usr/bin/env python3
2"""
3=head1 NAME
4
5docker_ - Docker wildcard-plugin to monitor a L<Docker|https://www.docker.com> host.
6
7This wildcard plugin provides series C<containers>, C<images>, C<status>,
8C<volumes>, C<cpu>, C<memory> and C<network> as separate graphs. It also
9supports a C<multi> suffix that provides all of those as a multigraph.
10
11=head1 INSTALLATION
12
13- Copy this plugin in your munin plugins directory
14- Install Python3 "docker" package
15
16=over 2
17
18If you want all the graphs as a multigraph, create a single multi symlink.
19
20 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_multi
21
22Or choose a subset of those you want.
23
24 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_containers
25 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu
26 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images
27 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory
28 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network
29 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status
30 ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes
31
32=back
33
34After the installation you need to restart your munin-node:
35
36=over 2
37
38 systemctl restart munin-node
39
40=back
41
42=head1 CONFIGURATION
43
44This plugin need to run as root, you need to create a file named docker placed in the
45directory /etc/munin/plugin-conf.d/ with the following config (you can also use
46Docker environment variables here as described in
47https://docs.docker.com/compose/reference/envvars/):
48
49You can use the EXCLUDE_CONTAINER_NAME environment variable to specify a regular expression
50which if matched will exclude the matching containers from the memory and cpu graphs.
51
52For example
53
54 env.EXCLUDE_CONTAINER_NAME runner
55
56Would exclude all containers with the word "runner" in the name.
57
58
59=over 2
60
61 [docker_*]
62 group docker
63 env.DOCKER_HOST unix://run/docker.sock
64 env.EXCLUDE_CONTAINER_NAME regexp
65
66=back
67
68You may need to pick a different group depending on the name schema of your
69distribution. Or maybe use "user root", if nothing else works.
70
71=head1 AUTHORS
72
73This section has been reverse-engineered from git logs
74
75Codimp <[email protected]>: original rewrite
76
77Rowan Wookey <[email protected]>: performance improvement
78
79Olivier Mehani <[email protected]>: Network support, ClientWrapper, general cleanup, multigraph
80
81=head1 MAGIC MARKERS
82
83 #%# family=auto
84 #%# capabilities=autoconf suggest multigraph
85
86=cut
87"""
88
89import os
90import sys
91import re
92try:
93 from functools import cached_property
94except ImportError:
95 # If cached_property is not available,
96 # just use the property decorator, without caching
97 # This is for backward compatibility with Python<3.8
98 cached_property = property
99from multiprocessing import Process, Queue
100
101
102def sorted_by_creation_date(func):
103 def sorted_func(*args, **kwargs):
104 return sorted(
105 func(*args, **kwargs),
106 key=(
107 lambda x: x.attrs['CreatedAt']
108 if 'CreatedAt' in x.attrs
109 else x.attrs['Created']
110 )
111 )
112 return sorted_func
113
114
115def clean_fieldname(text):
116 if text == "root":
117 # "root" is a magic (forbidden) word
118 return "_root"
119 else:
120 return re.sub(r"(^[^A-Za-z_]|[^A-Za-z0-9_])", "_", text)
121
122
123class ClientWrapper:
124 """
125 A small wrapper for the docker client, to centralise some parsing logic,
126 and support caching.
127
128 In addition, when the exclude_re parameter is not None,
129 any container which name is matched by the RE will not be excluded from reports.
130 """
131 client = None
132 exclude = None
133
134 def __init__(self, client, exclude_re=None):
135 self.client = client
136 if exclude_re:
137 self.exclude = re.compile(exclude_re)
138
139 @property
140 def api(self):
141 return self.client.api
142
143 @cached_property
144 @sorted_by_creation_date
145 def all_containers(self):
146 return [
147 c for c in self.client.containers.list(all=True)
148 if (c.status == 'running') and (not self.exclude or not self.exclude.search(c.name))
149 ]
150
151 @cached_property
152 @sorted_by_creation_date
153 def intermediate_images(self):
154 return list(
155 set(self.all_images)
156 .difference(
157 set(self.images)
158 .difference(
159 set(self.dangling_images)
160 )
161 )
162 )
163
164 @cached_property
165 @sorted_by_creation_date
166 def all_images(self):
167 return self.client.images.list(all=True)
168
169 @cached_property
170 @sorted_by_creation_date
171 def images(self):
172 images = self.client.images.list()
173 return list(
174 set(images)
175 .difference(
176 set(self.dangling_images))
177 )
178
179 @cached_property
180 @sorted_by_creation_date
181 def dangling_images(self):
182 return self.client.images.list(filters={'dangling': True})
183
184 @cached_property
185 @sorted_by_creation_date
186 def volumes(self):
187 return self.client.volumes.list()
188
189
190def container_summary(container, *args):
191 summary = container.name
192 attributes = container_attributes(container, *args)
193 if attributes:
194 summary += f' ({attributes})'
195 return summary
196
197
198def container_attributes(container, *args):
199 attributes = container.image.tags
200 attributes.append(container.attrs['Created'])
201 return ', '.join(attributes + list(args))
202
203
204def print_containers_status(client):
205 running = []
206 unhealthy = []
207 paused = []
208 created = []
209 restarting = []
210 removing = []
211 exited = []
212 dead = []
213 for container in client.all_containers:
214 if container.status == 'running':
215 state = client.api.inspect_container(container.name)['State']
216 if state.get('Health', {}).get('Status') == 'unhealthy':
217 unhealthy.append(container)
218 else:
219 running.append(container)
220 elif container.status == 'paused':
221 paused.append(container)
222 elif container.status == 'created':
223 created.append(container)
224 elif container.status == 'restarting':
225 restarting.append(container)
226 elif container.status == 'removing':
227 removing.append(container)
228 elif container.status == 'exited':
229 exited.append(container)
230 elif container.status == 'dead':
231 dead.append(container)
232 print('running.value', len(running))
233 print('running.extinfo', ', '.join(container_summary(c) for c in running))
234 print('unhealthy.value', len(unhealthy))
235 print('unhealthy.extinfo', ', '.join(container_summary(c) for c in unhealthy))
236 print('paused.value', len(paused))
237 print('paused.extinfo', ', '.join(container_summary(c) for c in paused))
238 print('created.value', len(created))
239 print('created.extinfo', ', '.join(container_summary(c) for c in created))
240 print('restarting.value', len(restarting))
241 print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting))
242 print('removing.value', len(removing))
243 print('removing.extinfo', ', '.join(container_summary(c) for c in removing))
244 print('exited.value', len(exited))
245 print('exited.extinfo', ', '.join(container_summary(c) for c in exited))
246 print('dead.value', len(dead))
247 print('dead.extinfo', ', '.join(container_summary(c) for c in dead))
248
249
250def image_summary(image):
251 attributes = image.tags
252 attributes.append(image.attrs['Created'])
253 attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB")
254 return f"{image.short_id} ({', '.join(attributes)})"
255
256
257def print_images_count(client):
258 images = client.images
259 intermediate = client.intermediate_images
260 dangling = client.dangling_images
261
262 print('intermediate_quantity.value', len(intermediate))
263 print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate))
264 print('images_quantity.value', len(images))
265 print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images))
266 print('dangling_quantity.value', len(dangling))
267 print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling))
268
269
270def get_container_stats(container, q):
271 q.put(container.stats(stream=False))
272
273
274def parallel_container_stats(client):
275 proc_list = []
276 stats = {}
277 for container in client.all_containers:
278 q = Queue()
279 p = Process(target=get_container_stats, args=(container, q))
280 proc_list.append({'proc': p, 'queue': q, 'container': container})
281 p.start()
282 for proc in proc_list:
283 proc['proc'].join()
284 stats[proc['container']] = proc['queue'].get()
285 return stats.items()
286
287
288def print_containers_cpu(client):
289 for container, stats in parallel_container_stats(client):
290 cpu_percent = 0.0
291 cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"])
292 - float(stats["precpu_stats"]["cpu_usage"]["total_usage"]))
293 system_delta = (float(stats["cpu_stats"]["system_cpu_usage"])
294 - float(stats["precpu_stats"]["system_cpu_usage"]))
295 if system_delta > 0.0:
296 cpu_percent = cpu_delta / system_delta * 100.0 * os.cpu_count()
297 clean_container_name = clean_fieldname(container.name)
298 print(clean_container_name + '.value', cpu_percent)
299 print(clean_container_name + '.extinfo', container_attributes(container))
300
301
302def print_containers_memory(client):
303 for container, stats in parallel_container_stats(client):
304 if 'total_rss' in stats['memory_stats']['stats']: # cgroupv1 only?
305 memory_usage = stats['memory_stats']['stats']['total_rss']
306 extinfo = 'Resident Set Size'
307 else:
308 memory_usage = stats['memory_stats']['usage']
309 extinfo = 'Total memory usage'
310 clean_container_name = clean_fieldname(container.name)
311 print(clean_container_name + '.value', memory_usage)
312 print(clean_container_name + '.extinfo', container_attributes(container, extinfo))
313
314
315def print_containers_network(client):
316 for container, stats in parallel_container_stats(client):
317 tx_bytes = 0
318 rx_bytes = 0
319 if "networks" in stats:
320 for data in stats['networks'].values():
321 tx_bytes += data['tx_bytes']
322 rx_bytes += data['rx_bytes']
323 clean_container_name = clean_fieldname(container.name)
324 print(clean_container_name + '_up.value', tx_bytes)
325 print(clean_container_name + '_down.value', rx_bytes)
326 print(clean_container_name + '_up.extinfo', container_attributes(container))
327
328
329def volume_summary(volume):
330 summary = f"{volume.short_id}"
331 if volume.attrs['Labels']:
332 summary += f" ({', '.join(volume.attrs['Labels'])})"
333 return summary
334
335
336def status(client, mode):
337 if mode == "config":
338 print("graph_title Docker status")
339 print("graph_vlabel containers")
340 print("graph_category virtualization")
341 print("graph_total All containers")
342 print("running.label RUNNING")
343 print("running.draw AREASTACK")
344 print("running.info Running containers can be manipulated with "
345 "`docker container [attach|kill|logs|pause|restart|stop] <NAME>` or "
346 "commands run in them with `docker container exec "
347 "[--detach|--interactive,--privileged,--tty] <NAME> <COMMAND>`"
348 )
349 print("unhealthy.label UNHEALTHY")
350 print("unhealthy.draw AREASTACK")
351 print("unhealthy.warning 1")
352 print("unhealthy.info Unhealthy containers can be restarted with "
353 "`docker container restart <NAME>`")
354 print("paused.label PAUSED")
355 print("paused.draw AREASTACK")
356 print("paused.info Paused containers can be resumed with "
357 "`docker container unpause <NAME>`")
358 print("created.label CREATED")
359 print("created.draw AREASTACK")
360 print("created.info New containers can be created with "
361 "`docker container create --name <NAME> <IMAGE_ID >` or "
362 "`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
363 print("restarting.label RESTARTING")
364 print("restarting.draw AREASTACK")
365 print("restarting.info Containers can be restarted with "
366 "`docker container restart <NAME>`")
367 print("removing.label REMOVING")
368 print("removing.draw AREASTACK")
369 print("removing.info Containers can be removed with "
370 "`docker container rm <NAME>`")
371 print("exited.label EXITED")
372 print("exited.draw AREASTACK")
373 print("exited.info Exited containers can be started with "
374 "`docker container start [--attach] <NAME>`")
375 print("dead.label DEAD")
376 print("dead.draw AREASTACK")
377 print("dead.warning 1")
378 print("dead.info Dead containers can be started with "
379 "`docker container start <NAME>`")
380 else:
381 print_containers_status(client)
382
383
384def containers(client, mode):
385 if mode == "config":
386 print("graph_title Docker containers")
387 print("graph_vlabel containers")
388 print("graph_category virtualization")
389 print("containers_quantity.label Containers")
390 else:
391 print('containers_quantity.value', len(client.all_containers))
392
393
394def images(client, mode):
395 if mode == "config":
396 print("graph_title Docker images")
397 print("graph_vlabel images")
398 print("graph_category virtualization")
399 print("graph_total All images")
400 print("intermediate_quantity.label Intermediate images")
401 print("intermediate_quantity.draw AREASTACK")
402 print("intermediate_quantity.info All unused images can be deleted with "
403 "`docker image prune --all`")
404 print("images_quantity.label Images")
405 print("images_quantity.draw AREASTACK")
406 print("images_quantity.info Images can be used in containers with "
407 "`docker container create --name <NAME> <IMAGE_ID >` or "
408 "`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
409 print("dangling_quantity.label Dangling images")
410 print("dangling_quantity.draw AREASTACK")
411 print("dangling_quantity.info Dangling images can be deleted with "
412 "`docker image prune`"
413 "or tagged with `docker image tag <IMAGE_ID> <NAME>`")
414 print("dangling_quantity.warning 10")
415 else:
416 print_images_count(client)
417
418
419def volumes(client, mode):
420 if mode == "config":
421 print("graph_title Docker volumes")
422 print("graph_vlabel volumes")
423 print("graph_category virtualization")
424 print("volumes_quantity.label Volumes")
425 print("volumes_quantity.draw AREASTACK")
426 print("volumes_quantity.info Unused volumes can be deleted with "
427 "`docker volume prune`")
428 else:
429 print('volumes_quantity.value', len(client.volumes))
430 print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes))
431
432
433def cpu(client, mode):
434 if mode == "config":
435 graphlimit = str(os.cpu_count() * 100)
436 print("graph_title Docker containers CPU usage")
437 print("graph_args --base 1000 -r --lower-limit 0 --upper-limit " + graphlimit)
438 print("graph_scale no")
439 print("graph_period second")
440 print("graph_vlabel CPU usage (%)")
441 print("graph_category virtualization")
442 print("graph_info This graph shows docker container CPU usage.")
443 print("graph_total Total CPU usage")
444 for container in client.all_containers:
445 fieldname = clean_fieldname(container.name)
446 print("{}.label {}".format(fieldname, container.name))
447 print("{}.draw AREASTACK".format(fieldname))
448 print("{}.info {}".format(fieldname, container_attributes(container)))
449 else:
450 print_containers_cpu(client)
451
452
453def network(client, mode):
454 if mode == "config":
455 print("graph_title Docker containers network usage")
456 print("graph_args --base 1024 -l 0")
457 print("graph_vlabel bits in (-) / out (+) per ${graph_period}")
458 print("graph_category virtualization")
459 print("graph_info This graph shows docker container network usage.")
460 print("graph_total Total network usage")
461 for container in client.all_containers:
462 fieldname = clean_fieldname(container.name)
463 print("{}_down.label {}_received".format(fieldname, container.name))
464 print("{}_down.type DERIVE".format(fieldname))
465 print("{}_down.min 0".format(fieldname))
466 print("{}_down.graph no".format(fieldname))
467 print("{}_down.cdef {}_down,8,*".format(fieldname, fieldname))
468 print("{}_up.label {}".format(fieldname, container.name))
469 print("{}_up.draw LINESTACK1".format(fieldname))
470 print("{}_up.type DERIVE".format(fieldname))
471 print("{}_up.min 0".format(fieldname))
472 print("{}_up.negative {}_down".format(fieldname, fieldname))
473 print("{}_up.cdef {}_up,8,*".format(fieldname, fieldname))
474 print("{}_up.info {}".format(fieldname, container_attributes(container)))
475 else:
476 print_containers_network(client)
477
478
479def memory(client, mode):
480 if mode == "config":
481 print("graph_title Docker containers memory usage")
482 print("graph_args --base 1024 -l 0")
483 print("graph_vlabel Bytes")
484 print("graph_category virtualization")
485 print("graph_info This graph shows docker container memory usage.")
486 print("graph_total Total memory usage")
487 for container in client.all_containers:
488 fieldname = clean_fieldname(container.name)
489 print("{}.label {}".format(fieldname, container.name))
490 print("{}.draw AREASTACK".format(fieldname))
491 print("{}.info {}".format(fieldname, container_attributes(container)))
492 else:
493 print_containers_memory(client)
494
495
496def main():
497 series = [
498 'containers',
499 'cpu',
500 'images',
501 'memory',
502 'network',
503 'status',
504 'volumes',
505 ]
506
507 try:
508 mode = sys.argv[1]
509 except IndexError:
510 mode = ""
511 wildcard = sys.argv[0].split("docker_")[1].split("_")[0]
512
513 try:
514 import docker
515 client = docker.from_env()
516 if mode == "autoconf":
517 client.ping()
518 print('yes')
519 sys.exit(0)
520 except Exception as e:
521 print(f'no ({e})')
522 if mode == "autoconf":
523 sys.exit(0)
524 sys.exit(1)
525
526 if mode == "suggest":
527 # The multigraph covers all other graphs,
528 # so we only need to suggest one
529 print("multi")
530 sys.exit(0)
531
532 client = ClientWrapper(client,
533 exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME'))
534
535 if wildcard in series:
536 # dereference the function name by looking in the globals()
537 # this assumes that the function name matches the series name exactly
538 # if this were to change, a different approach would be needed,
539 # most likely using a Dict of series name string to callable
540 globals()[wildcard](client, mode)
541 elif wildcard == 'multi':
542 for s in series:
543 print(f'multigraph docker_{s}')
544 # ditto
545 globals()[s](client, mode)
546 else:
547 print(f'unknown series ({wildcard})', file=sys.stderr)
548 sys.exit(1)
549
550
551if __name__ == '__main__':
552 main()
553
554