munin_docker_
· 19 KiB · Text
Sin formato
#!/usr/bin/env python3
"""
=head1 NAME
docker_ - Docker wildcard-plugin to monitor a L<Docker|https://www.docker.com> host.
This wildcard plugin provides series C<containers>, C<images>, C<status>,
C<volumes>, C<cpu>, C<memory> and C<network> as separate graphs. It also
supports a C<multi> suffix that provides all of those as a multigraph.
=head1 INSTALLATION
- Copy this plugin in your munin plugins directory
- Install Python3 "docker" package
=over 2
If you want all the graphs as a multigraph, create a single multi symlink.
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_multi
Or choose a subset of those you want.
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_containers
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes
=back
After the installation you need to restart your munin-node:
=over 2
systemctl restart munin-node
=back
=head1 CONFIGURATION
This plugin need to run as root, you need to create a file named docker placed in the
directory /etc/munin/plugin-conf.d/ with the following config (you can also use
Docker environment variables here as described in
https://docs.docker.com/compose/reference/envvars/):
You can use the EXCLUDE_CONTAINER_NAME environment variable to specify a regular expression
which if matched will exclude the matching containers from the memory and cpu graphs.
For example
env.EXCLUDE_CONTAINER_NAME runner
Would exclude all containers with the word "runner" in the name.
=over 2
[docker_*]
group docker
env.DOCKER_HOST unix://run/docker.sock
env.EXCLUDE_CONTAINER_NAME regexp
=back
You may need to pick a different group depending on the name schema of your
distribution. Or maybe use "user root", if nothing else works.
=head1 AUTHORS
This section has been reverse-engineered from git logs
Codimp <[email protected]>: original rewrite
Rowan Wookey <[email protected]>: performance improvement
Olivier Mehani <[email protected]>: Network support, ClientWrapper, general cleanup, multigraph
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf suggest multigraph
=cut
"""
import os
import sys
import re
try:
from functools import cached_property
except ImportError:
# If cached_property is not available,
# just use the property decorator, without caching
# This is for backward compatibility with Python<3.8
cached_property = property
from multiprocessing import Process, Queue
def sorted_by_creation_date(func):
def sorted_func(*args, **kwargs):
return sorted(
func(*args, **kwargs),
key=(
lambda x: x.attrs['CreatedAt']
if 'CreatedAt' in x.attrs
else x.attrs['Created']
)
)
return sorted_func
def clean_fieldname(text):
if text == "root":
# "root" is a magic (forbidden) word
return "_root"
else:
return re.sub(r"(^[^A-Za-z_]|[^A-Za-z0-9_])", "_", text)
class ClientWrapper:
"""
A small wrapper for the docker client, to centralise some parsing logic,
and support caching.
In addition, when the exclude_re parameter is not None,
any container which name is matched by the RE will not be excluded from reports.
"""
client = None
exclude = None
def __init__(self, client, exclude_re=None):
self.client = client
if exclude_re:
self.exclude = re.compile(exclude_re)
@property
def api(self):
return self.client.api
@cached_property
@sorted_by_creation_date
def all_containers(self):
return [
c for c in self.client.containers.list(all=True)
if (c.status == 'running') and (not self.exclude or not self.exclude.search(c.name))
]
@cached_property
@sorted_by_creation_date
def intermediate_images(self):
return list(
set(self.all_images)
.difference(
set(self.images)
.difference(
set(self.dangling_images)
)
)
)
@cached_property
@sorted_by_creation_date
def all_images(self):
return self.client.images.list(all=True)
@cached_property
@sorted_by_creation_date
def images(self):
images = self.client.images.list()
return list(
set(images)
.difference(
set(self.dangling_images))
)
@cached_property
@sorted_by_creation_date
def dangling_images(self):
return self.client.images.list(filters={'dangling': True})
@cached_property
@sorted_by_creation_date
def volumes(self):
return self.client.volumes.list()
def container_summary(container, *args):
summary = container.name
attributes = container_attributes(container, *args)
if attributes:
summary += f' ({attributes})'
return summary
def container_attributes(container, *args):
attributes = container.image.tags
attributes.append(container.attrs['Created'])
return ', '.join(attributes + list(args))
def print_containers_status(client):
running = []
unhealthy = []
paused = []
created = []
restarting = []
removing = []
exited = []
dead = []
for container in client.all_containers:
if container.status == 'running':
state = client.api.inspect_container(container.name)['State']
if state.get('Health', {}).get('Status') == 'unhealthy':
unhealthy.append(container)
else:
running.append(container)
elif container.status == 'paused':
paused.append(container)
elif container.status == 'created':
created.append(container)
elif container.status == 'restarting':
restarting.append(container)
elif container.status == 'removing':
removing.append(container)
elif container.status == 'exited':
exited.append(container)
elif container.status == 'dead':
dead.append(container)
print('running.value', len(running))
print('running.extinfo', ', '.join(container_summary(c) for c in running))
print('unhealthy.value', len(unhealthy))
print('unhealthy.extinfo', ', '.join(container_summary(c) for c in unhealthy))
print('paused.value', len(paused))
print('paused.extinfo', ', '.join(container_summary(c) for c in paused))
print('created.value', len(created))
print('created.extinfo', ', '.join(container_summary(c) for c in created))
print('restarting.value', len(restarting))
print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting))
print('removing.value', len(removing))
print('removing.extinfo', ', '.join(container_summary(c) for c in removing))
print('exited.value', len(exited))
print('exited.extinfo', ', '.join(container_summary(c) for c in exited))
print('dead.value', len(dead))
print('dead.extinfo', ', '.join(container_summary(c) for c in dead))
def image_summary(image):
attributes = image.tags
attributes.append(image.attrs['Created'])
attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB")
return f"{image.short_id} ({', '.join(attributes)})"
def print_images_count(client):
images = client.images
intermediate = client.intermediate_images
dangling = client.dangling_images
print('intermediate_quantity.value', len(intermediate))
print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate))
print('images_quantity.value', len(images))
print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images))
print('dangling_quantity.value', len(dangling))
print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling))
def get_container_stats(container, q):
q.put(container.stats(stream=False))
def parallel_container_stats(client):
proc_list = []
stats = {}
for container in client.all_containers:
q = Queue()
p = Process(target=get_container_stats, args=(container, q))
proc_list.append({'proc': p, 'queue': q, 'container': container})
p.start()
for proc in proc_list:
proc['proc'].join()
stats[proc['container']] = proc['queue'].get()
return stats.items()
def print_containers_cpu(client):
for container, stats in parallel_container_stats(client):
cpu_percent = 0.0
cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"])
- float(stats["precpu_stats"]["cpu_usage"]["total_usage"]))
system_delta = (float(stats["cpu_stats"]["system_cpu_usage"])
- float(stats["precpu_stats"]["system_cpu_usage"]))
if system_delta > 0.0:
cpu_percent = cpu_delta / system_delta * 100.0 * os.cpu_count()
clean_container_name = clean_fieldname(container.name)
print(clean_container_name + '.value', cpu_percent)
print(clean_container_name + '.extinfo', container_attributes(container))
def print_containers_memory(client):
for container, stats in parallel_container_stats(client):
if 'total_rss' in stats['memory_stats']['stats']: # cgroupv1 only?
memory_usage = stats['memory_stats']['stats']['total_rss']
extinfo = 'Resident Set Size'
else:
memory_usage = stats['memory_stats']['usage']
extinfo = 'Total memory usage'
clean_container_name = clean_fieldname(container.name)
print(clean_container_name + '.value', memory_usage)
print(clean_container_name + '.extinfo', container_attributes(container, extinfo))
def print_containers_network(client):
for container, stats in parallel_container_stats(client):
tx_bytes = 0
rx_bytes = 0
if "networks" in stats:
for data in stats['networks'].values():
tx_bytes += data['tx_bytes']
rx_bytes += data['rx_bytes']
clean_container_name = clean_fieldname(container.name)
print(clean_container_name + '_up.value', tx_bytes)
print(clean_container_name + '_down.value', rx_bytes)
print(clean_container_name + '_up.extinfo', container_attributes(container))
def volume_summary(volume):
summary = f"{volume.short_id}"
if volume.attrs['Labels']:
summary += f" ({', '.join(volume.attrs['Labels'])})"
return summary
def status(client, mode):
if mode == "config":
print("graph_title Docker status")
print("graph_vlabel containers")
print("graph_category virtualization")
print("graph_total All containers")
print("running.label RUNNING")
print("running.draw AREASTACK")
print("running.info Running containers can be manipulated with "
"`docker container [attach|kill|logs|pause|restart|stop] <NAME>` or "
"commands run in them with `docker container exec "
"[--detach|--interactive,--privileged,--tty] <NAME> <COMMAND>`"
)
print("unhealthy.label UNHEALTHY")
print("unhealthy.draw AREASTACK")
print("unhealthy.warning 1")
print("unhealthy.info Unhealthy containers can be restarted with "
"`docker container restart <NAME>`")
print("paused.label PAUSED")
print("paused.draw AREASTACK")
print("paused.info Paused containers can be resumed with "
"`docker container unpause <NAME>`")
print("created.label CREATED")
print("created.draw AREASTACK")
print("created.info New containers can be created with "
"`docker container create --name <NAME> <IMAGE_ID >` or "
"`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
print("restarting.label RESTARTING")
print("restarting.draw AREASTACK")
print("restarting.info Containers can be restarted with "
"`docker container restart <NAME>`")
print("removing.label REMOVING")
print("removing.draw AREASTACK")
print("removing.info Containers can be removed with "
"`docker container rm <NAME>`")
print("exited.label EXITED")
print("exited.draw AREASTACK")
print("exited.info Exited containers can be started with "
"`docker container start [--attach] <NAME>`")
print("dead.label DEAD")
print("dead.draw AREASTACK")
print("dead.warning 1")
print("dead.info Dead containers can be started with "
"`docker container start <NAME>`")
else:
print_containers_status(client)
def containers(client, mode):
if mode == "config":
print("graph_title Docker containers")
print("graph_vlabel containers")
print("graph_category virtualization")
print("containers_quantity.label Containers")
else:
print('containers_quantity.value', len(client.all_containers))
def images(client, mode):
if mode == "config":
print("graph_title Docker images")
print("graph_vlabel images")
print("graph_category virtualization")
print("graph_total All images")
print("intermediate_quantity.label Intermediate images")
print("intermediate_quantity.draw AREASTACK")
print("intermediate_quantity.info All unused images can be deleted with "
"`docker image prune --all`")
print("images_quantity.label Images")
print("images_quantity.draw AREASTACK")
print("images_quantity.info Images can be used in containers with "
"`docker container create --name <NAME> <IMAGE_ID >` or "
"`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
print("dangling_quantity.label Dangling images")
print("dangling_quantity.draw AREASTACK")
print("dangling_quantity.info Dangling images can be deleted with "
"`docker image prune`"
"or tagged with `docker image tag <IMAGE_ID> <NAME>`")
print("dangling_quantity.warning 10")
else:
print_images_count(client)
def volumes(client, mode):
if mode == "config":
print("graph_title Docker volumes")
print("graph_vlabel volumes")
print("graph_category virtualization")
print("volumes_quantity.label Volumes")
print("volumes_quantity.draw AREASTACK")
print("volumes_quantity.info Unused volumes can be deleted with "
"`docker volume prune`")
else:
print('volumes_quantity.value', len(client.volumes))
print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes))
def cpu(client, mode):
if mode == "config":
graphlimit = str(os.cpu_count() * 100)
print("graph_title Docker containers CPU usage")
print("graph_args --base 1000 -r --lower-limit 0 --upper-limit " + graphlimit)
print("graph_scale no")
print("graph_period second")
print("graph_vlabel CPU usage (%)")
print("graph_category virtualization")
print("graph_info This graph shows docker container CPU usage.")
print("graph_total Total CPU usage")
for container in client.all_containers:
fieldname = clean_fieldname(container.name)
print("{}.label {}".format(fieldname, container.name))
print("{}.draw AREASTACK".format(fieldname))
print("{}.info {}".format(fieldname, container_attributes(container)))
else:
print_containers_cpu(client)
def network(client, mode):
if mode == "config":
print("graph_title Docker containers network usage")
print("graph_args --base 1024 -l 0")
print("graph_vlabel bits in (-) / out (+) per ${graph_period}")
print("graph_category virtualization")
print("graph_info This graph shows docker container network usage.")
print("graph_total Total network usage")
for container in client.all_containers:
fieldname = clean_fieldname(container.name)
print("{}_down.label {}_received".format(fieldname, container.name))
print("{}_down.type DERIVE".format(fieldname))
print("{}_down.min 0".format(fieldname))
print("{}_down.graph no".format(fieldname))
print("{}_down.cdef {}_down,8,*".format(fieldname, fieldname))
print("{}_up.label {}".format(fieldname, container.name))
print("{}_up.draw LINESTACK1".format(fieldname))
print("{}_up.type DERIVE".format(fieldname))
print("{}_up.min 0".format(fieldname))
print("{}_up.negative {}_down".format(fieldname, fieldname))
print("{}_up.cdef {}_up,8,*".format(fieldname, fieldname))
print("{}_up.info {}".format(fieldname, container_attributes(container)))
else:
print_containers_network(client)
def memory(client, mode):
if mode == "config":
print("graph_title Docker containers memory usage")
print("graph_args --base 1024 -l 0")
print("graph_vlabel Bytes")
print("graph_category virtualization")
print("graph_info This graph shows docker container memory usage.")
print("graph_total Total memory usage")
for container in client.all_containers:
fieldname = clean_fieldname(container.name)
print("{}.label {}".format(fieldname, container.name))
print("{}.draw AREASTACK".format(fieldname))
print("{}.info {}".format(fieldname, container_attributes(container)))
else:
print_containers_memory(client)
def main():
series = [
'containers',
'cpu',
'images',
'memory',
'network',
'status',
'volumes',
]
try:
mode = sys.argv[1]
except IndexError:
mode = ""
wildcard = sys.argv[0].split("docker_")[1].split("_")[0]
try:
import docker
client = docker.from_env()
if mode == "autoconf":
client.ping()
print('yes')
sys.exit(0)
except Exception as e:
print(f'no ({e})')
if mode == "autoconf":
sys.exit(0)
sys.exit(1)
if mode == "suggest":
# The multigraph covers all other graphs,
# so we only need to suggest one
print("multi")
sys.exit(0)
client = ClientWrapper(client,
exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME'))
if wildcard in series:
# dereference the function name by looking in the globals()
# this assumes that the function name matches the series name exactly
# if this were to change, a different approach would be needed,
# most likely using a Dict of series name string to callable
globals()[wildcard](client, mode)
elif wildcard == 'multi':
for s in series:
print(f'multigraph docker_{s}')
# ditto
globals()[s](client, mode)
else:
print(f'unknown series ({wildcard})', file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()
| 1 | #!/usr/bin/env python3 |
| 2 | """ |
| 3 | =head1 NAME |
| 4 | |
| 5 | docker_ - Docker wildcard-plugin to monitor a L<Docker|https://www.docker.com> host. |
| 6 | |
| 7 | This wildcard plugin provides series C<containers>, C<images>, C<status>, |
| 8 | C<volumes>, C<cpu>, C<memory> and C<network> as separate graphs. It also |
| 9 | supports a C<multi> suffix that provides all of those as a multigraph. |
| 10 | |
| 11 | =head1 INSTALLATION |
| 12 | |
| 13 | - Copy this plugin in your munin plugins directory |
| 14 | - Install Python3 "docker" package |
| 15 | |
| 16 | =over 2 |
| 17 | |
| 18 | If you want all the graphs as a multigraph, create a single multi symlink. |
| 19 | |
| 20 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_multi |
| 21 | |
| 22 | Or choose a subset of those you want. |
| 23 | |
| 24 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_containers |
| 25 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu |
| 26 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images |
| 27 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory |
| 28 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network |
| 29 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status |
| 30 | ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes |
| 31 | |
| 32 | =back |
| 33 | |
| 34 | After the installation you need to restart your munin-node: |
| 35 | |
| 36 | =over 2 |
| 37 | |
| 38 | systemctl restart munin-node |
| 39 | |
| 40 | =back |
| 41 | |
| 42 | =head1 CONFIGURATION |
| 43 | |
| 44 | This plugin need to run as root, you need to create a file named docker placed in the |
| 45 | directory /etc/munin/plugin-conf.d/ with the following config (you can also use |
| 46 | Docker environment variables here as described in |
| 47 | https://docs.docker.com/compose/reference/envvars/): |
| 48 | |
| 49 | You can use the EXCLUDE_CONTAINER_NAME environment variable to specify a regular expression |
| 50 | which if matched will exclude the matching containers from the memory and cpu graphs. |
| 51 | |
| 52 | For example |
| 53 | |
| 54 | env.EXCLUDE_CONTAINER_NAME runner |
| 55 | |
| 56 | Would exclude all containers with the word "runner" in the name. |
| 57 | |
| 58 | |
| 59 | =over 2 |
| 60 | |
| 61 | [docker_*] |
| 62 | group docker |
| 63 | env.DOCKER_HOST unix://run/docker.sock |
| 64 | env.EXCLUDE_CONTAINER_NAME regexp |
| 65 | |
| 66 | =back |
| 67 | |
| 68 | You may need to pick a different group depending on the name schema of your |
| 69 | distribution. Or maybe use "user root", if nothing else works. |
| 70 | |
| 71 | =head1 AUTHORS |
| 72 | |
| 73 | This section has been reverse-engineered from git logs |
| 74 | |
| 75 | Codimp <[email protected]>: original rewrite |
| 76 | |
| 77 | Rowan Wookey <[email protected]>: performance improvement |
| 78 | |
| 79 | Olivier Mehani <[email protected]>: Network support, ClientWrapper, general cleanup, multigraph |
| 80 | |
| 81 | =head1 MAGIC MARKERS |
| 82 | |
| 83 | #%# family=auto |
| 84 | #%# capabilities=autoconf suggest multigraph |
| 85 | |
| 86 | =cut |
| 87 | """ |
| 88 | |
| 89 | import os |
| 90 | import sys |
| 91 | import re |
| 92 | try: |
| 93 | from functools import cached_property |
| 94 | except ImportError: |
| 95 | # If cached_property is not available, |
| 96 | # just use the property decorator, without caching |
| 97 | # This is for backward compatibility with Python<3.8 |
| 98 | cached_property = property |
| 99 | from multiprocessing import Process, Queue |
| 100 | |
| 101 | |
| 102 | def sorted_by_creation_date(func): |
| 103 | def sorted_func(*args, **kwargs): |
| 104 | return sorted( |
| 105 | func(*args, **kwargs), |
| 106 | key=( |
| 107 | lambda x: x.attrs['CreatedAt'] |
| 108 | if 'CreatedAt' in x.attrs |
| 109 | else x.attrs['Created'] |
| 110 | ) |
| 111 | ) |
| 112 | return sorted_func |
| 113 | |
| 114 | |
| 115 | def clean_fieldname(text): |
| 116 | if text == "root": |
| 117 | # "root" is a magic (forbidden) word |
| 118 | return "_root" |
| 119 | else: |
| 120 | return re.sub(r"(^[^A-Za-z_]|[^A-Za-z0-9_])", "_", text) |
| 121 | |
| 122 | |
| 123 | class ClientWrapper: |
| 124 | """ |
| 125 | A small wrapper for the docker client, to centralise some parsing logic, |
| 126 | and support caching. |
| 127 | |
| 128 | In addition, when the exclude_re parameter is not None, |
| 129 | any container which name is matched by the RE will not be excluded from reports. |
| 130 | """ |
| 131 | client = None |
| 132 | exclude = None |
| 133 | |
| 134 | def __init__(self, client, exclude_re=None): |
| 135 | self.client = client |
| 136 | if exclude_re: |
| 137 | self.exclude = re.compile(exclude_re) |
| 138 | |
| 139 | @property |
| 140 | def api(self): |
| 141 | return self.client.api |
| 142 | |
| 143 | @cached_property |
| 144 | @sorted_by_creation_date |
| 145 | def all_containers(self): |
| 146 | return [ |
| 147 | c for c in self.client.containers.list(all=True) |
| 148 | if (c.status == 'running') and (not self.exclude or not self.exclude.search(c.name)) |
| 149 | ] |
| 150 | |
| 151 | @cached_property |
| 152 | @sorted_by_creation_date |
| 153 | def intermediate_images(self): |
| 154 | return list( |
| 155 | set(self.all_images) |
| 156 | .difference( |
| 157 | set(self.images) |
| 158 | .difference( |
| 159 | set(self.dangling_images) |
| 160 | ) |
| 161 | ) |
| 162 | ) |
| 163 | |
| 164 | @cached_property |
| 165 | @sorted_by_creation_date |
| 166 | def all_images(self): |
| 167 | return self.client.images.list(all=True) |
| 168 | |
| 169 | @cached_property |
| 170 | @sorted_by_creation_date |
| 171 | def images(self): |
| 172 | images = self.client.images.list() |
| 173 | return list( |
| 174 | set(images) |
| 175 | .difference( |
| 176 | set(self.dangling_images)) |
| 177 | ) |
| 178 | |
| 179 | @cached_property |
| 180 | @sorted_by_creation_date |
| 181 | def dangling_images(self): |
| 182 | return self.client.images.list(filters={'dangling': True}) |
| 183 | |
| 184 | @cached_property |
| 185 | @sorted_by_creation_date |
| 186 | def volumes(self): |
| 187 | return self.client.volumes.list() |
| 188 | |
| 189 | |
| 190 | def container_summary(container, *args): |
| 191 | summary = container.name |
| 192 | attributes = container_attributes(container, *args) |
| 193 | if attributes: |
| 194 | summary += f' ({attributes})' |
| 195 | return summary |
| 196 | |
| 197 | |
| 198 | def container_attributes(container, *args): |
| 199 | attributes = container.image.tags |
| 200 | attributes.append(container.attrs['Created']) |
| 201 | return ', '.join(attributes + list(args)) |
| 202 | |
| 203 | |
| 204 | def print_containers_status(client): |
| 205 | running = [] |
| 206 | unhealthy = [] |
| 207 | paused = [] |
| 208 | created = [] |
| 209 | restarting = [] |
| 210 | removing = [] |
| 211 | exited = [] |
| 212 | dead = [] |
| 213 | for container in client.all_containers: |
| 214 | if container.status == 'running': |
| 215 | state = client.api.inspect_container(container.name)['State'] |
| 216 | if state.get('Health', {}).get('Status') == 'unhealthy': |
| 217 | unhealthy.append(container) |
| 218 | else: |
| 219 | running.append(container) |
| 220 | elif container.status == 'paused': |
| 221 | paused.append(container) |
| 222 | elif container.status == 'created': |
| 223 | created.append(container) |
| 224 | elif container.status == 'restarting': |
| 225 | restarting.append(container) |
| 226 | elif container.status == 'removing': |
| 227 | removing.append(container) |
| 228 | elif container.status == 'exited': |
| 229 | exited.append(container) |
| 230 | elif container.status == 'dead': |
| 231 | dead.append(container) |
| 232 | print('running.value', len(running)) |
| 233 | print('running.extinfo', ', '.join(container_summary(c) for c in running)) |
| 234 | print('unhealthy.value', len(unhealthy)) |
| 235 | print('unhealthy.extinfo', ', '.join(container_summary(c) for c in unhealthy)) |
| 236 | print('paused.value', len(paused)) |
| 237 | print('paused.extinfo', ', '.join(container_summary(c) for c in paused)) |
| 238 | print('created.value', len(created)) |
| 239 | print('created.extinfo', ', '.join(container_summary(c) for c in created)) |
| 240 | print('restarting.value', len(restarting)) |
| 241 | print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting)) |
| 242 | print('removing.value', len(removing)) |
| 243 | print('removing.extinfo', ', '.join(container_summary(c) for c in removing)) |
| 244 | print('exited.value', len(exited)) |
| 245 | print('exited.extinfo', ', '.join(container_summary(c) for c in exited)) |
| 246 | print('dead.value', len(dead)) |
| 247 | print('dead.extinfo', ', '.join(container_summary(c) for c in dead)) |
| 248 | |
| 249 | |
| 250 | def image_summary(image): |
| 251 | attributes = image.tags |
| 252 | attributes.append(image.attrs['Created']) |
| 253 | attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB") |
| 254 | return f"{image.short_id} ({', '.join(attributes)})" |
| 255 | |
| 256 | |
| 257 | def print_images_count(client): |
| 258 | images = client.images |
| 259 | intermediate = client.intermediate_images |
| 260 | dangling = client.dangling_images |
| 261 | |
| 262 | print('intermediate_quantity.value', len(intermediate)) |
| 263 | print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate)) |
| 264 | print('images_quantity.value', len(images)) |
| 265 | print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images)) |
| 266 | print('dangling_quantity.value', len(dangling)) |
| 267 | print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling)) |
| 268 | |
| 269 | |
| 270 | def get_container_stats(container, q): |
| 271 | q.put(container.stats(stream=False)) |
| 272 | |
| 273 | |
| 274 | def parallel_container_stats(client): |
| 275 | proc_list = [] |
| 276 | stats = {} |
| 277 | for container in client.all_containers: |
| 278 | q = Queue() |
| 279 | p = Process(target=get_container_stats, args=(container, q)) |
| 280 | proc_list.append({'proc': p, 'queue': q, 'container': container}) |
| 281 | p.start() |
| 282 | for proc in proc_list: |
| 283 | proc['proc'].join() |
| 284 | stats[proc['container']] = proc['queue'].get() |
| 285 | return stats.items() |
| 286 | |
| 287 | |
| 288 | def print_containers_cpu(client): |
| 289 | for container, stats in parallel_container_stats(client): |
| 290 | cpu_percent = 0.0 |
| 291 | cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"]) |
| 292 | - float(stats["precpu_stats"]["cpu_usage"]["total_usage"])) |
| 293 | system_delta = (float(stats["cpu_stats"]["system_cpu_usage"]) |
| 294 | - float(stats["precpu_stats"]["system_cpu_usage"])) |
| 295 | if system_delta > 0.0: |
| 296 | cpu_percent = cpu_delta / system_delta * 100.0 * os.cpu_count() |
| 297 | clean_container_name = clean_fieldname(container.name) |
| 298 | print(clean_container_name + '.value', cpu_percent) |
| 299 | print(clean_container_name + '.extinfo', container_attributes(container)) |
| 300 | |
| 301 | |
| 302 | def print_containers_memory(client): |
| 303 | for container, stats in parallel_container_stats(client): |
| 304 | if 'total_rss' in stats['memory_stats']['stats']: # cgroupv1 only? |
| 305 | memory_usage = stats['memory_stats']['stats']['total_rss'] |
| 306 | extinfo = 'Resident Set Size' |
| 307 | else: |
| 308 | memory_usage = stats['memory_stats']['usage'] |
| 309 | extinfo = 'Total memory usage' |
| 310 | clean_container_name = clean_fieldname(container.name) |
| 311 | print(clean_container_name + '.value', memory_usage) |
| 312 | print(clean_container_name + '.extinfo', container_attributes(container, extinfo)) |
| 313 | |
| 314 | |
| 315 | def print_containers_network(client): |
| 316 | for container, stats in parallel_container_stats(client): |
| 317 | tx_bytes = 0 |
| 318 | rx_bytes = 0 |
| 319 | if "networks" in stats: |
| 320 | for data in stats['networks'].values(): |
| 321 | tx_bytes += data['tx_bytes'] |
| 322 | rx_bytes += data['rx_bytes'] |
| 323 | clean_container_name = clean_fieldname(container.name) |
| 324 | print(clean_container_name + '_up.value', tx_bytes) |
| 325 | print(clean_container_name + '_down.value', rx_bytes) |
| 326 | print(clean_container_name + '_up.extinfo', container_attributes(container)) |
| 327 | |
| 328 | |
| 329 | def volume_summary(volume): |
| 330 | summary = f"{volume.short_id}" |
| 331 | if volume.attrs['Labels']: |
| 332 | summary += f" ({', '.join(volume.attrs['Labels'])})" |
| 333 | return summary |
| 334 | |
| 335 | |
| 336 | def status(client, mode): |
| 337 | if mode == "config": |
| 338 | print("graph_title Docker status") |
| 339 | print("graph_vlabel containers") |
| 340 | print("graph_category virtualization") |
| 341 | print("graph_total All containers") |
| 342 | print("running.label RUNNING") |
| 343 | print("running.draw AREASTACK") |
| 344 | print("running.info Running containers can be manipulated with " |
| 345 | "`docker container [attach|kill|logs|pause|restart|stop] <NAME>` or " |
| 346 | "commands run in them with `docker container exec " |
| 347 | "[--detach|--interactive,--privileged,--tty] <NAME> <COMMAND>`" |
| 348 | ) |
| 349 | print("unhealthy.label UNHEALTHY") |
| 350 | print("unhealthy.draw AREASTACK") |
| 351 | print("unhealthy.warning 1") |
| 352 | print("unhealthy.info Unhealthy containers can be restarted with " |
| 353 | "`docker container restart <NAME>`") |
| 354 | print("paused.label PAUSED") |
| 355 | print("paused.draw AREASTACK") |
| 356 | print("paused.info Paused containers can be resumed with " |
| 357 | "`docker container unpause <NAME>`") |
| 358 | print("created.label CREATED") |
| 359 | print("created.draw AREASTACK") |
| 360 | print("created.info New containers can be created with " |
| 361 | "`docker container create --name <NAME> <IMAGE_ID >` or " |
| 362 | "`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`") |
| 363 | print("restarting.label RESTARTING") |
| 364 | print("restarting.draw AREASTACK") |
| 365 | print("restarting.info Containers can be restarted with " |
| 366 | "`docker container restart <NAME>`") |
| 367 | print("removing.label REMOVING") |
| 368 | print("removing.draw AREASTACK") |
| 369 | print("removing.info Containers can be removed with " |
| 370 | "`docker container rm <NAME>`") |
| 371 | print("exited.label EXITED") |
| 372 | print("exited.draw AREASTACK") |
| 373 | print("exited.info Exited containers can be started with " |
| 374 | "`docker container start [--attach] <NAME>`") |
| 375 | print("dead.label DEAD") |
| 376 | print("dead.draw AREASTACK") |
| 377 | print("dead.warning 1") |
| 378 | print("dead.info Dead containers can be started with " |
| 379 | "`docker container start <NAME>`") |
| 380 | else: |
| 381 | print_containers_status(client) |
| 382 | |
| 383 | |
| 384 | def containers(client, mode): |
| 385 | if mode == "config": |
| 386 | print("graph_title Docker containers") |
| 387 | print("graph_vlabel containers") |
| 388 | print("graph_category virtualization") |
| 389 | print("containers_quantity.label Containers") |
| 390 | else: |
| 391 | print('containers_quantity.value', len(client.all_containers)) |
| 392 | |
| 393 | |
| 394 | def images(client, mode): |
| 395 | if mode == "config": |
| 396 | print("graph_title Docker images") |
| 397 | print("graph_vlabel images") |
| 398 | print("graph_category virtualization") |
| 399 | print("graph_total All images") |
| 400 | print("intermediate_quantity.label Intermediate images") |
| 401 | print("intermediate_quantity.draw AREASTACK") |
| 402 | print("intermediate_quantity.info All unused images can be deleted with " |
| 403 | "`docker image prune --all`") |
| 404 | print("images_quantity.label Images") |
| 405 | print("images_quantity.draw AREASTACK") |
| 406 | print("images_quantity.info Images can be used in containers with " |
| 407 | "`docker container create --name <NAME> <IMAGE_ID >` or " |
| 408 | "`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`") |
| 409 | print("dangling_quantity.label Dangling images") |
| 410 | print("dangling_quantity.draw AREASTACK") |
| 411 | print("dangling_quantity.info Dangling images can be deleted with " |
| 412 | "`docker image prune`" |
| 413 | "or tagged with `docker image tag <IMAGE_ID> <NAME>`") |
| 414 | print("dangling_quantity.warning 10") |
| 415 | else: |
| 416 | print_images_count(client) |
| 417 | |
| 418 | |
| 419 | def volumes(client, mode): |
| 420 | if mode == "config": |
| 421 | print("graph_title Docker volumes") |
| 422 | print("graph_vlabel volumes") |
| 423 | print("graph_category virtualization") |
| 424 | print("volumes_quantity.label Volumes") |
| 425 | print("volumes_quantity.draw AREASTACK") |
| 426 | print("volumes_quantity.info Unused volumes can be deleted with " |
| 427 | "`docker volume prune`") |
| 428 | else: |
| 429 | print('volumes_quantity.value', len(client.volumes)) |
| 430 | print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes)) |
| 431 | |
| 432 | |
| 433 | def cpu(client, mode): |
| 434 | if mode == "config": |
| 435 | graphlimit = str(os.cpu_count() * 100) |
| 436 | print("graph_title Docker containers CPU usage") |
| 437 | print("graph_args --base 1000 -r --lower-limit 0 --upper-limit " + graphlimit) |
| 438 | print("graph_scale no") |
| 439 | print("graph_period second") |
| 440 | print("graph_vlabel CPU usage (%)") |
| 441 | print("graph_category virtualization") |
| 442 | print("graph_info This graph shows docker container CPU usage.") |
| 443 | print("graph_total Total CPU usage") |
| 444 | for container in client.all_containers: |
| 445 | fieldname = clean_fieldname(container.name) |
| 446 | print("{}.label {}".format(fieldname, container.name)) |
| 447 | print("{}.draw AREASTACK".format(fieldname)) |
| 448 | print("{}.info {}".format(fieldname, container_attributes(container))) |
| 449 | else: |
| 450 | print_containers_cpu(client) |
| 451 | |
| 452 | |
| 453 | def network(client, mode): |
| 454 | if mode == "config": |
| 455 | print("graph_title Docker containers network usage") |
| 456 | print("graph_args --base 1024 -l 0") |
| 457 | print("graph_vlabel bits in (-) / out (+) per ${graph_period}") |
| 458 | print("graph_category virtualization") |
| 459 | print("graph_info This graph shows docker container network usage.") |
| 460 | print("graph_total Total network usage") |
| 461 | for container in client.all_containers: |
| 462 | fieldname = clean_fieldname(container.name) |
| 463 | print("{}_down.label {}_received".format(fieldname, container.name)) |
| 464 | print("{}_down.type DERIVE".format(fieldname)) |
| 465 | print("{}_down.min 0".format(fieldname)) |
| 466 | print("{}_down.graph no".format(fieldname)) |
| 467 | print("{}_down.cdef {}_down,8,*".format(fieldname, fieldname)) |
| 468 | print("{}_up.label {}".format(fieldname, container.name)) |
| 469 | print("{}_up.draw LINESTACK1".format(fieldname)) |
| 470 | print("{}_up.type DERIVE".format(fieldname)) |
| 471 | print("{}_up.min 0".format(fieldname)) |
| 472 | print("{}_up.negative {}_down".format(fieldname, fieldname)) |
| 473 | print("{}_up.cdef {}_up,8,*".format(fieldname, fieldname)) |
| 474 | print("{}_up.info {}".format(fieldname, container_attributes(container))) |
| 475 | else: |
| 476 | print_containers_network(client) |
| 477 | |
| 478 | |
| 479 | def memory(client, mode): |
| 480 | if mode == "config": |
| 481 | print("graph_title Docker containers memory usage") |
| 482 | print("graph_args --base 1024 -l 0") |
| 483 | print("graph_vlabel Bytes") |
| 484 | print("graph_category virtualization") |
| 485 | print("graph_info This graph shows docker container memory usage.") |
| 486 | print("graph_total Total memory usage") |
| 487 | for container in client.all_containers: |
| 488 | fieldname = clean_fieldname(container.name) |
| 489 | print("{}.label {}".format(fieldname, container.name)) |
| 490 | print("{}.draw AREASTACK".format(fieldname)) |
| 491 | print("{}.info {}".format(fieldname, container_attributes(container))) |
| 492 | else: |
| 493 | print_containers_memory(client) |
| 494 | |
| 495 | |
| 496 | def main(): |
| 497 | series = [ |
| 498 | 'containers', |
| 499 | 'cpu', |
| 500 | 'images', |
| 501 | 'memory', |
| 502 | 'network', |
| 503 | 'status', |
| 504 | 'volumes', |
| 505 | ] |
| 506 | |
| 507 | try: |
| 508 | mode = sys.argv[1] |
| 509 | except IndexError: |
| 510 | mode = "" |
| 511 | wildcard = sys.argv[0].split("docker_")[1].split("_")[0] |
| 512 | |
| 513 | try: |
| 514 | import docker |
| 515 | client = docker.from_env() |
| 516 | if mode == "autoconf": |
| 517 | client.ping() |
| 518 | print('yes') |
| 519 | sys.exit(0) |
| 520 | except Exception as e: |
| 521 | print(f'no ({e})') |
| 522 | if mode == "autoconf": |
| 523 | sys.exit(0) |
| 524 | sys.exit(1) |
| 525 | |
| 526 | if mode == "suggest": |
| 527 | # The multigraph covers all other graphs, |
| 528 | # so we only need to suggest one |
| 529 | print("multi") |
| 530 | sys.exit(0) |
| 531 | |
| 532 | client = ClientWrapper(client, |
| 533 | exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME')) |
| 534 | |
| 535 | if wildcard in series: |
| 536 | # dereference the function name by looking in the globals() |
| 537 | # this assumes that the function name matches the series name exactly |
| 538 | # if this were to change, a different approach would be needed, |
| 539 | # most likely using a Dict of series name string to callable |
| 540 | globals()[wildcard](client, mode) |
| 541 | elif wildcard == 'multi': |
| 542 | for s in series: |
| 543 | print(f'multigraph docker_{s}') |
| 544 | # ditto |
| 545 | globals()[s](client, mode) |
| 546 | else: |
| 547 | print(f'unknown series ({wildcard})', file=sys.stderr) |
| 548 | sys.exit(1) |
| 549 | |
| 550 | |
| 551 | if __name__ == '__main__': |
| 552 | main() |
| 553 | |
| 554 |