Skip to content

Commit 6f9c81a

Browse files
author
Alexander Popov
committed
feat: add new plugin MemoryLeakDiagnostic (prototype)
1 parent 94cf15b commit 6f9c81a

File tree

3 files changed

+182
-0
lines changed

3 files changed

+182
-0
lines changed

mamonsu/plugins/pgsql/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@
66
__all__ += ['archive_command']
77
__all__ += ['prepared_transaction']
88
__all__ += ['relations_size']
9+
__all__ += ['memory_leak_diagnostic']
910

1011
from . import *
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
from mamonsu.plugins.pgsql.plugin import PgsqlPlugin as Plugin
2+
import os
3+
from .pool import Pooler
4+
import logging
5+
import re
6+
7+
8+
class MemoryLeakDiagnostic(Plugin):
9+
DEFAULT_CONFIG = {'enabled': 'False',
10+
'private_anon_mem_threshold': '1GB'}
11+
Interval = 60
12+
13+
query = 'select pid from pg_stat_activity'
14+
key_count_diff = 'pgsql.memory_leak_diagnostic.count_diff[]'
15+
key_count_diff_error = 'pgsql.memory_leak_diagnostic.msg_text[]'
16+
name_count_diff = 'PostgreSQL: number of pids which private anonymous memory exceeds ' \
17+
'private_anon_mem_threshold'
18+
name_count_diff_error = 'PostgreSQL: number of pids which private anonymous memory ' \
19+
'exceeds private_anon_mem_threshold, text of message'
20+
21+
def __init__(self, config):
22+
super(Plugin, self).__init__(config)
23+
24+
if self.is_enabled():
25+
self.page_size = os.sysconf('SC_PAGE_SIZE')
26+
27+
private_anon_mem_threshold_row = self.plugin_config('private_anon_mem_threshold').upper()
28+
private_anon_mem_threshold, prefix = re.match(r'([0-9]*)([A-Z]*)',
29+
private_anon_mem_threshold_row, re.I).groups()
30+
ratio = 0
31+
32+
if prefix == 'MB':
33+
ratio = 1024 * 1024
34+
elif prefix == 'GB':
35+
ratio = 1024 * 1024 * 1024
36+
elif prefix == 'TB':
37+
ratio = 1024 * 1024 * 1024 * 1024
38+
else:
39+
logging.error('Error in config, section [{section}], parameter private_anon_mem_threshold. '
40+
'Possible values MB, GB, TB. For example 1GB.'
41+
.format(section=self.__class__.__name__.lower()))
42+
self.disable()
43+
self.diff = ratio * int(private_anon_mem_threshold)
44+
45+
self.os_release = os.uname().release
46+
os_release_file = '/etc/os-release'
47+
try:
48+
release_file = open(os_release_file, 'r').readlines()
49+
except Exception as e:
50+
logging.error(f'Cannot read file {os_release_file} : {e}')
51+
self.disable()
52+
53+
for line in release_file:
54+
k, v = line.split('=', 1)
55+
if k == 'ID':
56+
self.os_name = v.strip('"\n')
57+
elif k == 'VERSION_ID':
58+
self.os_version = v.strip('"\n')
59+
60+
def run(self, zbx):
61+
pids = []
62+
count_diff = 0
63+
diffs = []
64+
msg_text = ''
65+
66+
for row in Pooler.query(query=self.query):
67+
pids.append(row[0])
68+
print(self.os_release.split('.')[0])
69+
print(int(self.os_release.split('.')[1]))
70+
print(self.os_name)
71+
print(self.os_version)
72+
if int(self.os_release.split('.')[0]) <= 4 and \
73+
int(self.os_release.split('.')[1]) < 5 and \
74+
self.os_name != 'centos' and \
75+
self.os_version != '7':
76+
print('point 1')
77+
for pid in pids:
78+
try:
79+
statm = open(f'/proc/{pid}/statm', 'r').read().split(' ')
80+
except FileNotFoundError:
81+
continue
82+
83+
RES = int(statm[1]) * self.page_size
84+
SHR = int(statm[2]) * self.page_size
85+
if RES - SHR > self.diff:
86+
count_diff += 1
87+
diffs.append({'pid': pid, 'RES': RES, 'SHR': SHR, 'diff': self.diff})
88+
if diffs:
89+
for diff in diffs:
90+
msg_text += 'pid: {pid}, RES {RES} - SHR {SHR} more then {diff}\n'.format_map(diff)
91+
else:
92+
print('point 2')
93+
for pid in pids:
94+
try:
95+
statm = open(f'/proc/{pid}/status', 'r').readlines()
96+
except FileNotFoundError:
97+
continue
98+
99+
for line in statm:
100+
VmRSS = 0
101+
RssAnon = 0
102+
RssFile = 0
103+
RssShmem = 0
104+
k, v = line.split(':\t', 1)
105+
106+
if k == 'VmRSS':
107+
VmRSS = int(v.strip('"\n\t ').split(' ')[0]) * 1024
108+
elif k == 'RssAnon':
109+
RssAnon = int(v.strip('"\n\t ').split(' ')[0]) * 1024
110+
elif k == 'RssFile':
111+
RssFile = int(v.strip('"\n\t ').split(' ')[0]) * 1024
112+
elif k == 'RssShmem':
113+
RssShmem = int(v.strip('"\n\t ').split(' ')[0]) * 1024
114+
if RssAnon > self.diff:
115+
count_diff += 1
116+
diffs.append(
117+
{'pid': pid, 'VmRSS': VmRSS, 'RssAnon': RssAnon, 'RssFile': RssFile, 'RssShmem': RssShmem,
118+
'diff': self.diff})
119+
if diffs:
120+
for diff in diffs:
121+
msg_text += 'pid: {pid}, RssAnon {RssAnon} more then {diff}, VmRSS {VmRSS}, ' \
122+
'RssFile {RssFile}, RssShmem {RssShmem} \n'.format_map(diff)
123+
124+
zbx.send(self.key_count_diff, int(count_diff))
125+
zbx.send(self.key_count_diff_error, msg_text)
126+
127+
def items(self, template):
128+
result = template.item(
129+
{
130+
'name': self.name_count_diff,
131+
'key': self.key_count_diff,
132+
'delay': self.plugin_config('interval')
133+
}
134+
)
135+
result += template.item(
136+
{
137+
'name': self.name_count_diff_error,
138+
'key': self.key_count_diff_error,
139+
'delay': self.plugin_config('interval'),
140+
'value_type': Plugin.VALUE_TYPE.text
141+
}
142+
)
143+
return result
144+
145+
def graphs(self, template):
146+
result = template.graph(
147+
{
148+
'name': self.name_count_diff,
149+
'items': [
150+
{
151+
'key': self.key_count_diff,
152+
'color': 'EEEEEE'
153+
}
154+
]
155+
}
156+
)
157+
return result
158+
159+
def triggers(self, template):
160+
result = template.trigger(
161+
{
162+
'name': self.name_count_diff + ' on {HOSTNAME}. {ITEM.LASTVALUE}',
163+
'expression': '{{#TEMPLATE:{name}.strlen()'
164+
'}}&gt;1'.format(name=self.key_count_diff_error)
165+
})
166+
return result

packaging/conf/example.conf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,18 @@ enabled = False
196196
relations=postgres.pg_catalog.pg_class,postgres.pg_catalog.pg_user
197197
interval = 300
198198

199+
# This plugin allows detects possible memory leaks while working with PostgreSQL using /proc/pid/status and /proc/pid/statm
200+
# We use RES and SHR difference to calculate approximate volume of private anonymous backend memory.
201+
# If it exceeds private_anon_mem_threshold then that pid will be added to a message. An example is presented below
202+
# statm - 'pid: {pid}, RES {RES} - SHR {SHR} more then {private_anon_mem_threshold}\n'
203+
# Since Linux 4.5 RssAnon, RssFile and RssShmem have been added.
204+
# They allows to distinguish types of memory such as private anonymous, file-backed, and shared anonymous memory.
205+
# We are interested in RssAnon. If its value exceeds private_anon_mem_threshold then that pid will also be added to a message.
206+
# By default this plugin disabled. To enable this plugin - set bellow "enabled = False"
207+
# #interval - (onitoring frequency in seconds. 60 seconds by default
208+
# private_anon_mem_threshold - memory volume threshold after which we need an investigation about memory leak. 1GB by default.
209+
# Possible values MB, GB, TB. For example 1GB
210+
[memoryleakdiagnostic]
211+
enabled = False
212+
interval = 60
213+
private_anon_mem_threshold = 1GB

0 commit comments

Comments
 (0)