check_syncrepl_extended/check_syncrepl_extended

634 lines
21 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Script to check LDAP syncrepl replication state between two servers.
# One server is consider as provider and the other as consumer.
#
# This script can check replication state with two method :
# - by the first, entryCSN of all entries of LDAP directory will be
# compare between two servers
# - by the second, all values of all attributes of all entries will
# be compare between two servers.
#
# In all case, contextCSN of servers will be compare and entries not
# present in consumer or in provider will be notice. You can decide to
# disable contextCSN verification by using argument --no-check-contextCSN.
#
# This script is also able to "touch" LDAP object on provider to force
# synchronisation of this object. This mechanism consist to add '%%TOUCH%%'
# value to an attribute of this object and remove it just after. The
# touched attribute is specify by parameter --touch. Of course, couple of
# DN and password provided, must have write right on this attribute.
#
# If your prefer, you can use --replace-touch parameter to replace value
# of touched attribute instead of adding the touched value. Use-ful in
# case of single-value attribute.
#
# This script could be use as Nagios plugin (-n argument)
#
# Requirement:
# A single couple of DN and password able to connect to both server
# and without restriction to retrieve objects from servers.
#
# Author: Benjamin Renard <brenard@easter-eggs.com>
# Source: https://gitea.zionetrix.net/bn8/check_syncrepl_extended
#
import argparse
import getpass
import logging
import sys
import ldap
from ldap import LDAPError # pylint: disable=no-name-in-module
from ldap import modlist
from ldap.controls import SimplePagedResultsControl
VERSION = "0.0"
TOUCH_VALUE = b"%%TOUCH%%"
parser = argparse.ArgumentParser(
description=("Script to check LDAP syncrepl replication state between two servers."),
epilog=(
"Author: Benjamin Renard <brenard@easter-eggs.com>, "
f"Version: {VERSION}, "
"Source: https://gitea.zionetrix.net/bn8/check_syncrepl_extended"
),
)
parser.add_argument(
"-p",
"--provider",
dest="provider",
action="store",
type=str,
help="LDAP provider URI (example: ldaps://ldapmaster.foo:636)",
)
parser.add_argument(
"-c",
"--consumer",
dest="consumer",
action="store",
type=str,
help="LDAP consumer URI (example: ldaps://ldapslave.foo:636)",
)
parser.add_argument(
"-i",
"--serverID",
dest="serverid",
action="store",
type=int,
help=(
"Compare contextCSN of a specific master. Useful in MultiMaster "
"setups where each master has a unique ID and a contextCSN for "
"each replicated master exists. A valid serverID is a integer "
"value from 0 to 4095 (limited to 3 hex digits, example: '12' "
"compares the contextCSN matching '#00C#')"
),
default=False,
)
parser.add_argument(
"-T",
"--starttls",
dest="starttls",
action="store_true",
help="Start TLS on LDAP provider/consumers connections",
default=False,
)
parser.add_argument(
"-D",
"--dn",
dest="dn",
action="store",
type=str,
help="LDAP bind DN (example: uid=nagios,ou=sysaccounts,o=example",
)
parser.add_argument(
"-P", "--pwd", dest="pwd", action="store", type=str, help="LDAP bind password", default=None
)
parser.add_argument(
"-b",
"--basedn",
dest="basedn",
action="store",
type=str,
help="LDAP base DN (example: o=example)",
)
parser.add_argument(
"-f",
"--filter",
dest="filterstr",
action="store",
type=str,
help="LDAP filter (default: (objectClass=*))",
default="(objectClass=*)",
)
parser.add_argument(
"-d", "--debug", dest="debug", action="store_true", help="Debug mode", default=False
)
parser.add_argument(
"-n",
"--nagios",
dest="nagios",
action="store_true",
help="Nagios check plugin mode",
default=False,
)
parser.add_argument(
"-q", "--quiet", dest="quiet", action="store_true", help="Quiet mode", default=False
)
parser.add_argument(
"--no-check-certificate",
dest="nocheckcert",
action="store_true",
help="Don't check the server certificate (Default: False)",
default=False,
)
parser.add_argument(
"--no-check-contextCSN",
dest="nocheckcontextcsn",
action="store_true",
help="Don't check servers contextCSN (Default: False)",
default=False,
)
parser.add_argument(
"--only-check-contextCSN",
dest="onlycheckcontextcsn",
action="store_true",
help=("Only check servers root contextCSN (objects check disabled, default : False)"),
default=False,
)
parser.add_argument(
"-a",
"--attributes",
dest="attrs",
action="store_true",
help="Check attributes values (Default: check only entryCSN)",
default=False,
)
parser.add_argument(
"--exclude-attributes",
dest="excl_attrs",
action="store",
type=str,
help="Don't check this attribute (only in attribute check mode)",
default=None,
)
parser.add_argument(
"--touch",
dest="touch",
action="store",
type=str,
help=(
"Touch attribute giving in parameter to force resync a this LDAP "
f'object from provider. A value "{TOUCH_VALUE.decode()}" will be '
"add to this attribute and remove after. The user use to connect "
"to the LDAP directory must have write permission on this "
"attribute on each object."
),
default=None,
)
parser.add_argument(
"--replace-touch",
dest="replacetouch",
action="store_true",
help="In touch mode, replace value instead of adding.",
default=False,
)
parser.add_argument(
"--remove-touch-value",
dest="removetouchvalue",
action="store_true",
help="In touch mode, remove touch value if present.",
default=False,
)
parser.add_argument(
"--page-size",
dest="page_size",
action="store",
type=int,
help=("Page size: if defined, paging control using LDAP v3 extended control will be enabled."),
default=None,
)
options = parser.parse_args()
if options.nocheckcontextcsn and options.onlycheckcontextcsn:
parser.error(
"You can't use both --no-check-contextCSN and "
"--only-check-contextCSN parameters and the same time"
)
if options.nagios:
sys.exit(3)
sys.exit(1)
if not options.provider or not options.consumer:
parser.error("You must provide provider and customer URI")
if options.nagios:
sys.exit(3)
sys.exit(1)
if not options.basedn:
parser.error("You must provide base DN of connection to LDAP servers")
if options.nagios:
sys.exit(3)
sys.exit(1)
if not 0 <= options.serverid <= 4095:
parser.error(
"ServerID should be a integer value from 0 to 4095 (limited to 3 hexadecimal digits)."
)
if options.nagios:
sys.exit(3)
sys.exit(1)
if options.touch and not options.attrs:
logging.info("Force option attrs on touch mode")
options.attrs = True
if options.dn and options.pwd is None:
options.pwd = getpass.getpass()
excl_attrs = []
if options.excl_attrs:
for ex in options.excl_attrs.split(","):
excl_attrs.append(ex.strip())
FORMAT = "%(asctime)s - %(levelname)s: %(message)s"
if options.debug:
logging.basicConfig(level=logging.DEBUG, format=FORMAT)
ldap.set_option(ldap.OPT_DEBUG_LEVEL, 0) # pylint: disable=no-member
elif options.nagios:
logging.basicConfig(level=logging.ERROR, format=FORMAT)
elif options.quiet:
logging.basicConfig(level=logging.WARNING, format=FORMAT)
else:
logging.basicConfig(level=logging.INFO, format=FORMAT)
class LdapServer:
uri = None
dn = None
pwd = None
start_tls = False
con = 0
def __init__(self, uri, dn, pwd, start_tls=False, page_size=None):
self.uri = uri
self.dn = dn
self.pwd = pwd
self.start_tls = start_tls
self.page_size = page_size
def connect(self):
if self.con == 0:
try:
con = ldap.initialize(self.uri)
# pylint: disable=no-member
con.protocol_version = ldap.VERSION3
if self.start_tls:
con.start_tls_s()
if self.dn:
con.simple_bind_s(self.dn, self.pwd)
self.con = con
except LDAPError:
logging.error("LDAP Error", exc_info=True)
return False
return True
def getContextCSN(self, basedn=False, serverid=False):
if not basedn:
basedn = self.dn
data = self.search(basedn, "(objectclass=*)", attrs=["contextCSN"], scope="base")
if data:
contextCSNs = data[0][0][1]["contextCSN"]
logging.debug("Found contextCSNs %s", contextCSNs)
if serverid is False:
return contextCSNs[0]
csnid = str(format(serverid, "X")).zfill(3)
sub = str.encode(f"#{csnid}#", encoding="ascii", errors="replace")
CSN = [s for s in contextCSNs if sub in s]
if not CSN:
logging.error(
"No contextCSN matching with ServerID %s (=%s) could be found.",
serverid,
sub,
)
return False
return CSN[0]
return False
@staticmethod
def get_scope(scope):
if scope == "base":
return ldap.SCOPE_BASE # pylint: disable=no-member
if scope == "one":
return ldap.SCOPE_ONELEVEL # pylint: disable=no-member
if scope == "sub":
return ldap.SCOPE_SUBTREE # pylint: disable=no-member
raise Exception(f'Unknown LDAP scope "{scope}"') # pylint: disable=broad-exception-raised
def search(self, basedn, filterstr, attrs=None, scope=None):
if self.page_size:
return self.paged_search(basedn, filterstr, attrs=attrs, scope=scope)
res_id = self.con.search(
basedn, self.get_scope(scope if scope else "sub"), filterstr, attrs if attrs else []
)
ret = []
while 1:
res_type, res_data = self.con.result(res_id, 0)
if res_data == []:
break
if res_type == ldap.RES_SEARCH_ENTRY: # pylint: disable=no-member
ret.append(res_data)
return ret
def paged_search(self, basedn, filterstr, attrs=None, scope=None):
ret = []
page = 0
pg_ctrl = SimplePagedResultsControl(True, self.page_size, "")
while page == 0 or pg_ctrl.cookie:
page += 1
logging.debug("Page search: loading page %d", page)
res_id = self.con.search_ext(
basedn,
self.get_scope(scope if scope else "sub"),
filterstr,
attrs if attrs else [],
serverctrls=[pg_ctrl],
)
# pylint: disable=unused-variable
res_type, res_data, res_id, serverctrls = self.con.result3(res_id)
for serverctrl in serverctrls:
if serverctrl.controlType == SimplePagedResultsControl.controlType:
pg_ctrl.cookie = serverctrl.cookie
break
for item in res_data:
ret.append([item])
return ret
def update_object(self, dn, old, new):
ldif = modlist.modifyModlist(old, new)
if not ldif:
return True
try:
logging.debug("Update object %s: %s", dn, ldif)
self.con.modify_s(dn, ldif)
return True
except LDAPError:
logging.error("Error updating object %s", dn, exc_info=True)
return False
@staticmethod
def get_attr(obj, attr):
if attr in obj[0][1]:
return obj[0][1][attr]
return []
def touch_object(self, dn, attr, orig_value):
old = {}
if orig_value:
old[attr] = orig_value
new = {}
if options.replacetouch:
if not orig_value or TOUCH_VALUE not in orig_value:
new[attr] = [TOUCH_VALUE]
else:
new[attr] = list(orig_value)
if orig_value or TOUCH_VALUE in orig_value:
new[attr].remove(TOUCH_VALUE)
else:
new[attr].append(TOUCH_VALUE)
try:
logging.info('Touch object "%s" on attribute "%s": %s => %s', dn, attr, old, new)
if self.update_object(dn, old, new):
logging.info('Restore original value of attribute "%s" of object "%s"', attr, dn)
if options.removetouchvalue and TOUCH_VALUE in old[attr]:
old[attr].remove(TOUCH_VALUE)
self.update_object(dn=dn, old=new, new=old)
return True
except LDAPError:
logging.error('Error touching object "%s"', dn, exc_info=True)
return False
if options.nocheckcert:
# pylint: disable=no-member
ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER)
servers = [options.provider, options.consumer]
LdapServers = {}
LdapObjects = {}
LdapServersCSN = {}
for srv in servers:
logging.info("Connect to %s", srv)
LdapServers[srv] = LdapServer(
srv, options.dn, options.pwd, options.starttls, page_size=options.page_size
)
if not LdapServers[srv].connect():
if options.nagios:
print(f"UNKWNON - Failed to connect to {srv}")
sys.exit(3)
else:
sys.exit(1)
if not options.nocheckcontextcsn:
LdapServersCSN[srv] = LdapServers[srv].getContextCSN(options.basedn, options.serverid)
logging.info("ContextCSN of %s: %s", srv, LdapServersCSN[srv])
if not options.onlycheckcontextcsn:
logging.info("List objects from %s", srv)
LdapObjects[srv] = {}
if options.attrs:
for obj in LdapServers[srv].search(options.basedn, options.filterstr, []):
logging.debug("Found on %s: %s", srv, obj[0][0])
LdapObjects[srv][obj[0][0]] = obj[0][1]
else:
for obj in LdapServers[srv].search(options.basedn, options.filterstr, ["entryCSN"]):
logging.debug("Found on %s: %s / %s", srv, obj[0][0], obj[0][1]["entryCSN"][0])
LdapObjects[srv][obj[0][0]] = obj[0][1]["entryCSN"][0]
logging.info("%s objects founds", len(LdapObjects[srv]))
if not options.onlycheckcontextcsn:
not_found = {}
not_sync = {}
for srv in servers:
not_found[srv] = []
not_sync[srv] = []
if options.attrs:
logging.info("Check if objects a are synchronized (by comparing attributes's values)")
else:
logging.info("Check if objects are synchronized (by comparing entryCSN)")
for obj in LdapObjects[options.provider]:
logging.debug("Check obj %s", obj)
for srv_name, srv in LdapObjects.items():
if srv_name == options.provider:
continue
if obj in srv:
touch = False
if LdapObjects[options.provider][obj] != srv[obj]:
if options.attrs:
attrs_list = []
for attr in LdapObjects[options.provider][obj]:
if attr in excl_attrs:
continue
if attr not in srv[obj]:
attrs_list.append(attr)
logging.debug(
"Obj %s not synchronized: %s not present on %s",
obj,
",".join(attrs_list),
srv_name,
)
touch = True
else:
srv[obj][attr].sort()
LdapObjects[options.provider][obj][attr].sort()
if srv[obj][attr] != LdapObjects[options.provider][obj][attr]:
attrs_list.append(attr)
logging.debug(
"Obj %s not synchronized: %s not same value(s)",
obj,
",".join(attrs_list),
)
touch = True
if attrs_list:
not_sync[srv_name].append(f'{obj} ({",".join(attrs_list)})')
else:
logging.debug(
"Obj %s not synchronized: %s <-> %s",
obj,
LdapObjects[options.provider][obj],
srv[obj],
)
not_sync[srv_name].append(obj)
if touch and options.touch:
orig_value = []
if options.touch in LdapObjects[options.provider][obj]:
orig_value = LdapObjects[options.provider][obj][options.touch]
LdapServers[options.provider].touch_object(obj, options.touch, orig_value)
else:
logging.debug("Obj %s: not found on %s", obj, srv_name)
not_found[srv_name].append(obj)
if options.touch:
orig_value = []
if options.touch in LdapObjects[options.provider][obj]:
orig_value = LdapObjects[options.provider][obj][options.touch]
LdapServers[options.provider].touch_object(obj, options.touch, orig_value)
for obj in LdapObjects[options.consumer]:
logging.debug("Check obj %s of consumer", obj)
if obj not in LdapObjects[options.provider]:
logging.debug("Obj %s: not found on provider", obj)
not_found[options.provider].append(obj)
if options.nagios:
errors = []
long_output = []
if not options.nocheckcontextcsn:
if not LdapServersCSN[options.provider]:
errors.append("ContextCSN of LDAP server provider could not be found")
else:
long_output.append(
f"ContextCSN on LDAP server provider = {LdapServersCSN[options.provider]}"
)
for srv_name, srv_csn in LdapServersCSN.items():
if srv_name == options.provider:
continue
if not srv_csn:
errors.append(f"ContextCSN of {srv_name} not found")
elif srv_csn != LdapServersCSN[options.provider]:
errors.append(f"ContextCSN of {srv_name} not the same of provider")
long_output.append(f"ContextCSN on LDAP server {srv_name} = {srv_csn}")
if not options.onlycheckcontextcsn:
if not_found[options.consumer]:
errors.append(f"{len(not_found[options.consumer])} not found object(s) on consumer")
long_output.append(f"Object(s) not found on server {options.consumer} (consumer):")
for obj in not_found[options.consumer]:
long_output.append(f" - {obj}")
if not_found[options.provider]:
errors.append(f"{len(not_found[options.provider])} not found object(s) on provider")
long_output.append(f"Object(s) not found on server {options.provider} (provider):")
for obj in not_found[options.provider]:
long_output.append(f" - {obj}")
if not_sync[options.consumer]:
errors.append(
f"{len(not_sync[options.consumer])} not synchronized object(s) on consumer"
)
long_output.append(
f"Object(s) not synchronized on server {options.consumer} (consumer):"
)
for obj in not_sync[options.consumer]:
long_output.append(f" - {obj}")
if errors:
print(f'CRITICAL: {", ".join(errors)}')
print("\n\n")
print("\n".join(long_output))
sys.exit(2)
else:
print("OK: consumer and provider are synchronized")
sys.exit(0)
else:
noerror = True
for srv in servers:
if not options.nocheckcontextcsn:
if not LdapServersCSN[options.provider]:
logging.warning("ContextCSN of LDAP server provider could not be found")
noerror = False
else:
for srv_name, srv_csn in LdapServersCSN.items():
if srv_name == options.provider:
continue
if not srv_csn:
logging.warning("ContextCSN of %s not found", srv_name)
noerror = False
elif srv_csn != LdapServersCSN[options.provider]:
logging.warning("ContextCSN of %s not the same of provider", srv_name)
noerror = False
if not options.onlycheckcontextcsn:
if not_found[srv]:
logging.warning(
"Not found objects on %s :\n - %s", srv, "\n - ".join(not_found[srv])
)
noerror = False
if not_sync[srv]:
logging.warning("Not sync objects on %s: %s", srv, "\n - ".join(not_sync[srv]))
noerror = False
if noerror:
logging.info("No sync problem detected")