Do not touch timestamp if cronjob failed!
[pyutils.git] / examples / cron / cron.py
1 #!/usr/bin/env python3
2
3 """Wrapper that adds exclusive locks, timeouts, timestamp accounting,
4 max frequency, logging, etc... to running cron jobs.
5 """
6
7 import datetime
8 import logging
9 import os
10 import sys
11 from typing import Optional
12
13 from pyutils import bootstrap, config, exec_utils, stopwatch
14 from pyutils.datetimes import datetime_utils
15 from pyutils.files import file_utils, lockfile
16
17 logger = logging.getLogger(__name__)
18
19 cfg = config.add_commandline_args(
20     f"Python Cron Runner ({__file__})",
21     "Wrapper for cron commands with locking, timeouts, and accounting.",
22 )
23 cfg.add_argument(
24     "--lockfile",
25     default=None,
26     metavar="LOCKFILE_PATH",
27     help="Path to the lockfile to use to ensure that two instances of a command do not execute contemporaneously.",
28 )
29 cfg.add_argument(
30     "--lockfile_audit_record",
31     default=None,
32     metavar="LOCKFILE_AUDIT_RECORD_FILENAME",
33     help="Path to a record of when the logfile was held/released and for what reason",
34 )
35 cfg.add_argument(
36     "--timeout",
37     type=str,
38     metavar="TIMEOUT",
39     default=None,
40     help='Maximum time for lock acquisition + command execution.  Undecorated for seconds but "3m" or "1h 15m" work too.',
41 )
42 cfg.add_argument(
43     "--timestamp",
44     type=str,
45     metavar="TIMESTAMP_FILE",
46     default=None,
47     help="The /timestamp/TIMESTAMP_FILE file tracking the work being done; files' mtimes will be set to the last successful run of a command for accounting purposes.",
48 )
49 cfg.add_argument(
50     "--max_frequency",
51     type=str,
52     metavar="FREQUENCY",
53     default=None,
54     help='The maximum frequency with which to do this work; even if the wrapper is invoked more often than this it will not run the command.  Requires --timestamp.  Undecorated for seconds but "3h" or "1h 15m" work too.',
55 )
56 cfg.add_argument(
57     "--command",
58     nargs="*",
59     required=True,
60     type=str,
61     metavar="COMMANDLINE",
62     help="The commandline to run under a lock.",
63 )
64 config.overwrite_argparse_epilog(
65     """
66 cron.py's exit value:
67
68    -1000 = some internal error occurred (see exception log).
69        0 = we exited early due to not enough time passage since the last
70            invocation of --command.
71     1000 = we could not obtain the lockfile; someone else owns it.
72  else = if the --command was run successfully, cron.py will exit with
73         the same code that the subcommand exited with.
74 """
75 )
76
77
78 def run_command(timeout: Optional[int], timestamp_file: Optional[str]) -> int:
79     """Run cron command"""
80     cmd = " ".join(config.config["command"])
81     logger.info('cron cmd = "%s"', cmd)
82     logger.debug("shell environment:")
83     for var in os.environ:
84         val = os.environ[var]
85         logger.debug("%s = %s", var, val)
86     logger.debug("____ (↓↓↓ output from the subprocess appears below here ↓↓↓) ____")
87     try:
88         with stopwatch.Timer() as t:
89             ret = exec_utils.cmd_exitcode(cmd, timeout)
90         logger.debug(
91             f"____ (↑↑↑ subprocess finished in {t():.2f}s, exit value was {ret} ↑↑↑) ____"
92         )
93         if ret == 0 and timestamp_file is not None and os.path.exists(timestamp_file):
94             logger.debug("Touching %s", timestamp_file)
95             file_utils.touch_file(timestamp_file)
96         return ret
97     except Exception as e:
98         logger.exception(e)
99         print("Cron subprocess failed, giving up.", file=sys.stderr)
100         logger.warning("Cron subprocess failed, giving up")
101         return -1000
102
103
104 @bootstrap.initialize
105 def main() -> int:
106     """Entry point"""
107     if config.config["timestamp"]:
108         timestamp_file = f"/timestamps/{config.config['timestamp']}"
109         if not file_utils.does_file_exist(timestamp_file):
110             logger.error(
111                 "--timestamp argument's target file (%s) must already exist.",
112                 timestamp_file,
113             )
114             sys.exit(-1)
115     else:
116         timestamp_file = None
117         if config.config["max_frequency"]:
118             config.error(
119                 "The --max_frequency argument requires the --timestamp argument."
120             )
121
122     now = datetime.datetime.now()
123     if timestamp_file is not None and os.path.exists(timestamp_file):
124         max_frequency = config.config["max_frequency"]
125         if max_frequency is not None:
126             max_delta = datetime_utils.parse_duration(max_frequency)
127             if max_delta > 0:
128                 mtime = file_utils.get_file_mtime_as_datetime(timestamp_file)
129                 delta = now - mtime
130                 if delta.total_seconds() < max_delta:
131                     logger.info(
132                         "It's only been %s since we last ran successfully; bailing out.",
133                         datetime_utils.describe_duration_briefly(delta.total_seconds()),
134                     )
135                     sys.exit(0)
136
137     timeout = config.config["timeout"]
138     if timeout is not None:
139         timeout = datetime_utils.parse_duration(timeout)
140         assert timeout > 0
141         logger.debug("Timeout is %ss", timeout)
142         lockfile_expiration = datetime.datetime.now().timestamp() + timeout
143     else:
144         logger.debug("Timeout not specified; no lockfile expiration.")
145         lockfile_expiration = None
146
147     lockfile_path = config.config["lockfile"]
148     if lockfile_path is not None:
149         logger.debug("Attempting to acquire lockfile %s...", lockfile_path)
150         try:
151             with lockfile.LockFile(
152                 lockfile_path,
153                 do_signal_cleanup=True,
154                 override_command=" ".join(config.config["command"]),
155                 expiration_timestamp=lockfile_expiration,
156             ) as lf:
157                 record = config.config["lockfile_audit_record"]
158                 cmd = " ".join(config.config["command"])
159                 if record:
160                     start = lf.locktime
161                     with open(record, "a") as wf:
162                         print(f"{lockfile_path}, ACQUIRE, {start}, {cmd}", file=wf)
163                 retval = run_command(timeout, timestamp_file)
164                 if record:
165                     end = datetime.datetime.now().timestamp()
166                     duration = datetime_utils.describe_duration_briefly(end - start)
167                     with open(record, "a") as wf:
168                         print(
169                             f"{lockfile_path}, RELEASE({duration}), {end}, {cmd}",
170                             file=wf,
171                         )
172                 return retval
173         except lockfile.LockFileException as e:
174             logger.exception(e)
175             msg = f"Failed to acquire {lockfile_path}, giving up."
176             logger.error(msg)
177             print(msg, file=sys.stderr)
178             return 1000
179     else:
180         logger.debug("No lockfile indicated; not locking anything.")
181         return run_command(timeout, timestamp_file)
182
183
184 if __name__ == "__main__":
185     # Insist that our logger.whatever('messages') make their way into
186     # syslog with a facility=LOG_CRON, please.  Yes, this is hacky.
187     sys.argv.append("--logging_syslog")
188     sys.argv.append("--logging_syslog_facility=CRON")
189     main()