Initial Commit
This commit is contained in:
commit
b92cf90a0b
3 changed files with 157 additions and 0 deletions
10
README
Normal file
10
README
Normal file
|
@ -0,0 +1,10 @@
|
|||
A simple monitoring script for online services. Includes self-checks to be more resistant
|
||||
to unstable networks as it is meant to be run on a home device (nothing keeps you from
|
||||
running it on a server tho).
|
||||
|
||||
Currently it publishes all downtime to a repository but the fail function can be easily
|
||||
modified to suit one's needs (nothing depends on it). Information about all services is
|
||||
in the monitors dictionary.
|
||||
|
||||
The requirements are modular, so adding new ways of checking for status is as easy as
|
||||
writing a function that returns a bool if it passed.
|
143
monitor.py
Normal file
143
monitor.py
Normal file
|
@ -0,0 +1,143 @@
|
|||
#!/bin/python3
|
||||
# HTTP, DNS, and IP monitoring script
|
||||
import time
|
||||
import logging
|
||||
import datetime
|
||||
import socket
|
||||
|
||||
import requests
|
||||
import pydig
|
||||
import git
|
||||
import pytz
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
REPO_ROOT = "status-repo"
|
||||
|
||||
# last states of services to keep from detecting downtime repeatedly
|
||||
last_states = {}
|
||||
|
||||
|
||||
# publish a failed service, no dependents so edit at will
|
||||
def fail(service_name: str, failed_requirements: list):
|
||||
if not last_states.get(service_name, True):
|
||||
return
|
||||
now = datetime.datetime.now(tz=pytz.timezone("Europe/Prague"))
|
||||
filename = f"src/content/{now.strftime('%Y-%m-%d-%f')}-downtime.md"
|
||||
repo = git.Repo(REPO_ROOT)
|
||||
origin = repo.remote('origin')
|
||||
try:
|
||||
origin.pull(kill_after_timeout=10)
|
||||
except git.exc.CommandError:
|
||||
logger.warning("Failed to pull from origin! Aborting!")
|
||||
return
|
||||
|
||||
with open(REPO_ROOT + "/" + filename, 'w+') as f:
|
||||
lines = [
|
||||
"---\n",
|
||||
f"title: {service_name} downtime\n",
|
||||
f"date: {now.strftime('%Y-%m-%d %H:%M:%S %z')}\n",
|
||||
"severity: down\n",
|
||||
"affected:\n",
|
||||
f" - {service_name}\n",
|
||||
"---\n",
|
||||
f"Automatic checks for {service_name} have failed. "
|
||||
f"Requirements {[r.__name__ for r in failed_requirements]} failed.\n"
|
||||
]
|
||||
f.writelines(lines)
|
||||
repo.git.add(filename)
|
||||
repo.git.commit('-m', f'{service_name} downtime')
|
||||
try:
|
||||
origin.push(kill_after_timeout=10)
|
||||
except git.exc.CommandError:
|
||||
logger.warning("Push to origin failed! Aborting and resetting!")
|
||||
repo.git.reset("origin/HEAD", working_tree=True)
|
||||
|
||||
logger.warning(f"service {service_name} failed {[r.__name__ for r in failed_requirements]}")
|
||||
|
||||
|
||||
def self_check():
|
||||
try:
|
||||
if requests.get("https://google.com/").status_code != 200:
|
||||
return False
|
||||
except ConnectionError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def http_requirement(url: str, code: int) -> bool:
|
||||
passed = False
|
||||
for i in range(2):
|
||||
try:
|
||||
resp = requests.get(url)
|
||||
except ConnectionError:
|
||||
passed = False
|
||||
else:
|
||||
passed = resp.status_code == code
|
||||
if passed:
|
||||
break
|
||||
return passed
|
||||
|
||||
|
||||
def dns_requirement(name: str, ip: str) -> bool:
|
||||
try:
|
||||
query = pydig.query(name, "A")
|
||||
except ConnectionError:
|
||||
return False
|
||||
return query and (ip == "*" or ip in query)
|
||||
|
||||
|
||||
def ip_requirement(ip: str, port: int, prot: str) -> bool:
|
||||
protocol = socket.SOCK_STREAM if prot == "tcp" else socket.SOCK_DGRAM
|
||||
sock = socket.socket(type=protocol)
|
||||
try:
|
||||
sock.connect((ip, port))
|
||||
except ConnectionError:
|
||||
return False
|
||||
sock.close()
|
||||
return True
|
||||
|
||||
|
||||
def check(monitors: dict):
|
||||
for service, requirements in monitors.items():
|
||||
logger.debug(f"Checking service {service}")
|
||||
failed = []
|
||||
for requirement, args in requirements.items():
|
||||
logger.debug(f" checking requirement {requirement.__name__}")
|
||||
passed = requirement(**args)
|
||||
if not passed:
|
||||
if not self_check():
|
||||
logger.warning("Self-check failed, assuming bad connection and aborting")
|
||||
return
|
||||
logger.info(f"{service} failed requirement {requirement.__name__}")
|
||||
failed.append(requirement)
|
||||
time.sleep(1)
|
||||
if failed:
|
||||
fail(service, failed)
|
||||
last_states[service] = len(failed) == 0
|
||||
logger.debug("check complete")
|
||||
|
||||
|
||||
monitors = {
|
||||
"f.bain.cz": {
|
||||
http_requirement: {"url": "https://f.bain.cz/status", "code": 200},
|
||||
# dns_requirement: {"name": "f.bain.cz", "ip": "*"},
|
||||
# ip_requirement: {"ip": "f.bain.cz", "port": 80, "prot": "tcp"}
|
||||
},
|
||||
"s.bain.cz": {
|
||||
http_requirement: {"url": "https://s.bain.cz/", "code": 200},
|
||||
},
|
||||
"git.bain.cz": {
|
||||
http_requirement: {"url": "https://git.bain.cz/", "code": 200},
|
||||
},
|
||||
"ts3.bain.cz": {
|
||||
ip_requirement: {"ip": "ts3.bain.cz", "port": 9987, "prot": "udp"}
|
||||
}
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
# we assume this is gonna be run in a cron job as the gitpython
|
||||
# library is slowly leaking memory apparently
|
||||
check(monitors)
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
|||
requests
|
||||
pydig
|
||||
gitpython
|
||||
pytz
|
Loading…
Reference in a new issue