diff --git a/autocert/examples/hello-mtls/README.md b/autocert/examples/hello-mtls/README.md index ec7267f2..16843615 100644 --- a/autocert/examples/hello-mtls/README.md +++ b/autocert/examples/hello-mtls/README.md @@ -74,3 +74,11 @@ languages are appreciated! - [X] Restrict to safe ciphersuites and TLS versions - [ ] TLS stack configuration loaded from `step-ca` - [ ] Root certificate rotation + +[envoy/](envoy/) +- [X] Server + - [X] mTLS (client authentication using internal root certificate) + - [X] Automatic certificate renewal + - [X] Restrict to safe ciphersuites and TLS versions + - [ ] TLS stack configuration loaded from `step-ca` + - [ ] Root certificate rotation diff --git a/autocert/examples/hello-mtls/envoy/Dockerfile.server b/autocert/examples/hello-mtls/envoy/Dockerfile.server new file mode 100644 index 00000000..29491cc6 --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/Dockerfile.server @@ -0,0 +1,21 @@ +FROM envoyproxy/envoy-alpine + +RUN apk update +RUN apk add python3 +RUN apk add inotify-tools +RUN mkdir /src + +ADD entrypoint.sh /src +ADD certwatch.sh /src +ADD hot-restarter.py /src +ADD start-envoy.sh /src +ADD server.yaml /src + +# Flask app +ADD server.py /src +ADD requirements.txt /src +RUN pip3 install -r /src/requirements.txt + +# app, certificate watcher and envoy +ENTRYPOINT ["/src/entrypoint.sh"] +CMD ["python3", "/src/hot-restarter.py", "/src/start-envoy.sh"] diff --git a/autocert/examples/hello-mtls/envoy/certwatch.sh b/autocert/examples/hello-mtls/envoy/certwatch.sh new file mode 100755 index 00000000..9a65619d --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/certwatch.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +while true; do + inotifywait -e modify /var/run/autocert.step.sm/site.crt + kill -HUP 1 +done diff --git a/autocert/examples/hello-mtls/envoy/entrypoint.sh b/autocert/examples/hello-mtls/envoy/entrypoint.sh new file mode 100755 index 00000000..e22174a1 --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/entrypoint.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +# start hello world app +python3 /src/server.py & + +# watch for the update of the cert and reload nginx +/src/certwatch.sh & + +# Run docker CMD +exec "$@" \ No newline at end of file diff --git a/autocert/examples/hello-mtls/envoy/hello-mtls.server.yaml b/autocert/examples/hello-mtls/envoy/hello-mtls.server.yaml new file mode 100644 index 00000000..dfe5fd7f --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/hello-mtls.server.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Service +metadata: + labels: {app: hello-mtls} + name: hello-mtls +spec: + type: ClusterIP + ports: + - port: 443 + targetPort: 443 + selector: {app: hello-mtls} + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: hello-mtls + labels: {app: hello-mtls} +spec: + replicas: 1 + selector: {matchLabels: {app: hello-mtls}} + template: + metadata: + annotations: + autocert.step.sm/name: hello-mtls.default.svc.cluster.local + labels: {app: hello-mtls} + spec: + containers: + - name: hello-mtls + image: hello-mtls-server-envoy:latest + imagePullPolicy: Never + resources: {requests: {cpu: 10m, memory: 20Mi}} diff --git a/autocert/examples/hello-mtls/envoy/hot-restarter.py b/autocert/examples/hello-mtls/envoy/hot-restarter.py new file mode 100644 index 00000000..e0b4a7e0 --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/hot-restarter.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +from __future__ import print_function + +import os +import signal +import sys +import time + +# The number of seconds to wait for children to gracefully exit after +# propagating SIGTERM before force killing children. +# NOTE: If using a shutdown mechanism such as runit's `force-stop` which sends +# a KILL after a specified timeout period, it's important to ensure that this +# constant is smaller than the KILL timeout +TERM_WAIT_SECONDS = 30 + +restart_epoch = 0 +pid_list = [] + + +def term_all_children(): + """ Iterate through all known child processes, send a TERM signal to each of + them, and then wait up to TERM_WAIT_SECONDS for them to exit gracefully, + exiting early if all children go away. If one or more children have not + exited after TERM_WAIT_SECONDS, they will be forcibly killed """ + + # First uninstall the SIGCHLD handler so that we don't get called again. + signal.signal(signal.SIGCHLD, signal.SIG_DFL) + + global pid_list + for pid in pid_list: + print("sending TERM to PID={}".format(pid)) + try: + os.kill(pid, signal.SIGTERM) + except OSError: + print("error sending TERM to PID={} continuing".format(pid)) + + all_exited = False + + # wait for TERM_WAIT_SECONDS seconds for children to exit cleanly + retries = 0 + while not all_exited and retries < TERM_WAIT_SECONDS: + for pid in list(pid_list): + ret_pid, exit_status = os.waitpid(pid, os.WNOHANG) + if ret_pid == 0 and exit_status == 0: + # the child is still running + continue + + pid_list.remove(pid) + + if len(pid_list) == 0: + all_exited = True + else: + retries += 1 + time.sleep(1) + + if all_exited: + print("all children exited cleanly") + else: + for pid in pid_list: + print("child PID={} did not exit cleanly, killing".format(pid)) + force_kill_all_children() + sys.exit(1) # error status because a child did not exit cleanly + + +def force_kill_all_children(): + """ Iterate through all known child processes and force kill them. Typically + term_all_children() should be attempted first to give child processes an + opportunity to clean up state before exiting """ + + global pid_list + for pid in pid_list: + print("force killing PID={}".format(pid)) + try: + os.kill(pid, signal.SIGKILL) + except OSError: + print("error force killing PID={} continuing".format(pid)) + + pid_list = [] + + +def shutdown(): + """ Attempt to gracefully shutdown all child Envoy processes and then exit. + See term_all_children() for further discussion. """ + term_all_children() + sys.exit(0) + + +def sigterm_handler(signum, frame): + """ Handler for SIGTERM. """ + print("got SIGTERM") + shutdown() + + +def sigint_handler(signum, frame): + """ Handler for SIGINT (ctrl-c). The same as the SIGTERM handler. """ + print("got SIGINT") + shutdown() + + +def sighup_handler(signum, frame): + """ Handler for SIGUP. This signal is used to cause the restarter to fork and exec a new + child. """ + + print("got SIGHUP") + fork_and_exec() + + +def sigusr1_handler(signum, frame): + """ Handler for SIGUSR1. Propagate SIGUSR1 to all of the child processes """ + + global pid_list + for pid in pid_list: + print("sending SIGUSR1 to PID={}".format(pid)) + try: + os.kill(pid, signal.SIGUSR1) + except OSError: + print("error in SIGUSR1 to PID={} continuing".format(pid)) + + +def sigchld_handler(signum, frame): + """ Handler for SIGCHLD. Iterates through all of our known child processes and figures out whether + the signal/exit was expected or not. Python doesn't have any of the native signal handlers + ability to get the child process info directly from the signal handler so we need to iterate + through all child processes and see what happened.""" + + print("got SIGCHLD") + + kill_all_and_exit = False + global pid_list + pid_list_copy = list(pid_list) + for pid in pid_list_copy: + ret_pid, exit_status = os.waitpid(pid, os.WNOHANG) + if ret_pid == 0 and exit_status == 0: + # This child is still running. + continue + + pid_list.remove(pid) + + # Now we see how the child exited. + if os.WIFEXITED(exit_status): + exit_code = os.WEXITSTATUS(exit_status) + print("PID={} exited with code={}".format(ret_pid, exit_code)) + if exit_code == 0: + # Normal exit. We assume this was on purpose. + pass + else: + # Something bad happened. We need to tear everything down so that whoever started the + # restarter can know about this situation and restart the whole thing. + kill_all_and_exit = True + elif os.WIFSIGNALED(exit_status): + print("PID={} was killed with signal={}".format(ret_pid, os.WTERMSIG(exit_status))) + kill_all_and_exit = True + else: + kill_all_and_exit = True + + if kill_all_and_exit: + print("Due to abnormal exit, force killing all child processes and exiting") + + # First uninstall the SIGCHLD handler so that we don't get called again. + signal.signal(signal.SIGCHLD, signal.SIG_DFL) + + force_kill_all_children() + + # Our last child died, so we have no purpose. Exit. + if not pid_list: + print("exiting due to lack of child processes") + sys.exit(1 if kill_all_and_exit else 0) + + +def fork_and_exec(): + """ This routine forks and execs a new child process and keeps track of its PID. Before we fork, + set the current restart epoch in an env variable that processes can read if they care. """ + + global restart_epoch + os.environ['RESTART_EPOCH'] = str(restart_epoch) + print("forking and execing new child process at epoch {}".format(restart_epoch)) + restart_epoch += 1 + + child_pid = os.fork() + if child_pid == 0: + # Child process + os.execl(sys.argv[1], sys.argv[1]) + else: + # Parent process + print("forked new child process with PID={}".format(child_pid)) + pid_list.append(child_pid) + + +def main(): + """ Script main. This script is designed so that a process watcher like runit or monit can watch + this process and take corrective action if it ever goes away. """ + + print("starting hot-restarter with target: {}".format(sys.argv[1])) + + signal.signal(signal.SIGTERM, sigterm_handler) + signal.signal(signal.SIGINT, sigint_handler) + signal.signal(signal.SIGHUP, sighup_handler) + signal.signal(signal.SIGCHLD, sigchld_handler) + signal.signal(signal.SIGUSR1, sigusr1_handler) + + # Start the first child process and then go into an endless loop since everything else happens via + # signals. + fork_and_exec() + while True: + time.sleep(60) + + +if __name__ == '__main__': + main() diff --git a/autocert/examples/hello-mtls/envoy/requirements.txt b/autocert/examples/hello-mtls/envoy/requirements.txt new file mode 100644 index 00000000..e3e9a71d --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/requirements.txt @@ -0,0 +1 @@ +Flask diff --git a/autocert/examples/hello-mtls/envoy/server.py b/autocert/examples/hello-mtls/envoy/server.py new file mode 100644 index 00000000..7e44425f --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/server.py @@ -0,0 +1,9 @@ +from flask import Flask +app = Flask(__name__) + +@app.route("/") +def hello(): + return "Hello World!\n" + +if __name__ == "__main__": + app.run(host='127.0.0.1', port=8080, debug=False) diff --git a/autocert/examples/hello-mtls/envoy/server.yaml b/autocert/examples/hello-mtls/envoy/server.yaml new file mode 100644 index 00000000..76b3c83a --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/server.yaml @@ -0,0 +1,50 @@ +static_resources: + listeners: + - address: + socket_address: + address: 0.0.0.0 + port_value: 443 + filter_chains: + - filters: + - name: envoy.http_connection_manager + config: + codec_type: auto + stat_prefix: ingress_http + route_config: + name: hello + virtual_hosts: + - name: hello + domains: + - "hello-mtls.default.svc.cluster.local" + routes: + - match: + prefix: "/" + route: + cluster: hello-mTLS + http_filters: + - name: envoy.router + config: {} + tls_context: + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + cipher_suites: "[ECDHE-ECDSA-AES128-GCM-SHA256|ECDHE-ECDSA-CHACHA20-POLY1305]" + tls_certificates: + - certificate_chain: + filename: "/var/run/autocert.step.sm/site.crt" + private_key: + filename: "/var/run/autocert.step.sm/site.key" + validation_context: + trusted_ca: + filename: "/var/run/autocert.step.sm/root.crt" + require_client_certificate: true + clusters: + - name: hello-mTLS + connect_timeout: 0.25s + type: strict_dns + lb_policy: round_robin + hosts: + - socket_address: + address: 127.0.0.1 + port_value: 8080 \ No newline at end of file diff --git a/autocert/examples/hello-mtls/envoy/start-envoy.sh b/autocert/examples/hello-mtls/envoy/start-envoy.sh new file mode 100755 index 00000000..fe58a94e --- /dev/null +++ b/autocert/examples/hello-mtls/envoy/start-envoy.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +ulimit -n 65536 +/usr/local/bin/envoy -c /src/server.yaml --service-cluster hello-mTLS --restart-epoch $RESTART_EPOCH