Add envoy hello mTLS example.

Fixes smallstep/ca-component#144
5 years ago · 2e98febcd9
parent 23c60d5f3f
commit 2e98febcd9
10 changed files with 351 additions and 0 deletions
--- a/autocert/examples/hello-mtls/README.md
+++ b/autocert/examples/hello-mtls/README.md
@ -74,3 +74,11 @@ languages are appreciated!
  - [X] Restrict to safe ciphersuites and TLS versions
  - [ ] TLS stack configuration loaded from `step-ca`
  - [ ] Root certificate rotation
+
+[envoy/](envoy/)
+- [X] Server
+  - [X] mTLS (client authentication using internal root certificate)
+  - [X] Automatic certificate renewal
+  - [X] Restrict to safe ciphersuites and TLS versions
+  - [ ] TLS stack configuration loaded from `step-ca`
+  - [ ] Root certificate rotation
--- a/autocert/examples/hello-mtls/envoy/Dockerfile.server
+++ b/autocert/examples/hello-mtls/envoy/Dockerfile.server
@ -0,0 +1,21 @@
+FROM envoyproxy/envoy-alpine
+
+RUN apk update
+RUN apk add python3
+RUN apk add inotify-tools
+RUN mkdir /src
+
+ADD entrypoint.sh /src
+ADD certwatch.sh /src
+ADD hot-restarter.py /src
+ADD start-envoy.sh /src
+ADD server.yaml /src
+
+# Flask app
+ADD server.py /src
+ADD requirements.txt /src
+RUN pip3 install -r /src/requirements.txt
+
+# app, certificate watcher and envoy
+ENTRYPOINT ["/src/entrypoint.sh"]
+CMD ["python3", "/src/hot-restarter.py", "/src/start-envoy.sh"]
--- a/autocert/examples/hello-mtls/envoy/certwatch.sh
+++ b/autocert/examples/hello-mtls/envoy/certwatch.sh
@ -0,0 +1,6 @@
+#!/bin/sh
+
+while true; do
+    inotifywait -e modify /var/run/autocert.step.sm/site.crt
+    kill -HUP 1
+done
--- a/autocert/examples/hello-mtls/envoy/entrypoint.sh
+++ b/autocert/examples/hello-mtls/envoy/entrypoint.sh
@ -0,0 +1,10 @@
+#!/bin/sh
+
+# start hello world app
+python3 /src/server.py &
+
+# watch for the update of the cert and reload nginx
+/src/certwatch.sh &
+
+# Run docker CMD
+exec "$@"
--- a/autocert/examples/hello-mtls/envoy/hello-mtls.server.yaml
+++ b/autocert/examples/hello-mtls/envoy/hello-mtls.server.yaml
@ -0,0 +1,33 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels: {app: hello-mtls}
+  name: hello-mtls
+spec:
+  type: ClusterIP
+  ports:
+  - port: 443
+    targetPort: 443
+  selector: {app: hello-mtls}
+
+---
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: hello-mtls
+  labels: {app: hello-mtls}
+spec:
+  replicas: 1
+  selector: {matchLabels: {app: hello-mtls}}
+  template:
+    metadata:
+      annotations:
+        autocert.step.sm/name: hello-mtls.default.svc.cluster.local
+      labels: {app: hello-mtls}
+    spec:
+      containers:
+      - name: hello-mtls
+        image: hello-mtls-server-envoy:latest
+        imagePullPolicy: Never
+        resources: {requests: {cpu: 10m, memory: 20Mi}}
--- a/autocert/examples/hello-mtls/envoy/hot-restarter.py
+++ b/autocert/examples/hello-mtls/envoy/hot-restarter.py
@ -0,0 +1,209 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import os
+import signal
+import sys
+import time
+
+# The number of seconds to wait for children to gracefully exit after
+# propagating SIGTERM before force killing children.
+# NOTE: If using a shutdown mechanism such as runit's `force-stop` which sends
+# a KILL after a specified timeout period, it's important to ensure that this
+# constant is smaller than the KILL timeout
+TERM_WAIT_SECONDS = 30
+
+restart_epoch = 0
+pid_list = []
+
+
+def term_all_children():
+  """ Iterate through all known child processes, send a TERM signal to each of
+  them, and then wait up to TERM_WAIT_SECONDS for them to exit gracefully,
+  exiting early if all children go away. If one or more children have not
+  exited after TERM_WAIT_SECONDS, they will be forcibly killed """
+
+  # First uninstall the SIGCHLD handler so that we don't get called again.
+  signal.signal(signal.SIGCHLD, signal.SIG_DFL)
+
+  global pid_list
+  for pid in pid_list:
+    print("sending TERM to PID={}".format(pid))
+    try:
+      os.kill(pid, signal.SIGTERM)
+    except OSError:
+      print("error sending TERM to PID={} continuing".format(pid))
+
+  all_exited = False
+
+  # wait for TERM_WAIT_SECONDS seconds for children to exit cleanly
+  retries = 0
+  while not all_exited and retries < TERM_WAIT_SECONDS:
+    for pid in list(pid_list):
+      ret_pid, exit_status = os.waitpid(pid, os.WNOHANG)
+      if ret_pid == 0 and exit_status == 0:
+        # the child is still running
+        continue
+
+      pid_list.remove(pid)
+
+    if len(pid_list) == 0:
+      all_exited = True
+    else:
+      retries += 1
+      time.sleep(1)
+
+  if all_exited:
+    print("all children exited cleanly")
+  else:
+    for pid in pid_list:
+      print("child PID={} did not exit cleanly, killing".format(pid))
+    force_kill_all_children()
+    sys.exit(1)  # error status because a child did not exit cleanly
+
+
+def force_kill_all_children():
+  """ Iterate through all known child processes and force kill them. Typically
+  term_all_children() should be attempted first to give child processes an
+  opportunity to clean up state before exiting """
+
+  global pid_list
+  for pid in pid_list:
+    print("force killing PID={}".format(pid))
+    try:
+      os.kill(pid, signal.SIGKILL)
+    except OSError:
+      print("error force killing PID={} continuing".format(pid))
+
+  pid_list = []
+
+
+def shutdown():
+  """ Attempt to gracefully shutdown all child Envoy processes and then exit.
+  See term_all_children() for further discussion. """
+  term_all_children()
+  sys.exit(0)
+
+
+def sigterm_handler(signum, frame):
+  """ Handler for SIGTERM. """
+  print("got SIGTERM")
+  shutdown()
+
+
+def sigint_handler(signum, frame):
+  """ Handler for SIGINT (ctrl-c). The same as the SIGTERM handler. """
+  print("got SIGINT")
+  shutdown()
+
+
+def sighup_handler(signum, frame):
+  """ Handler for SIGUP. This signal is used to cause the restarter to fork and exec a new
+      child. """
+
+  print("got SIGHUP")
+  fork_and_exec()
+
+
+def sigusr1_handler(signum, frame):
+  """ Handler for SIGUSR1. Propagate SIGUSR1 to all of the child processes """
+
+  global pid_list
+  for pid in pid_list:
+    print("sending SIGUSR1 to PID={}".format(pid))
+    try:
+      os.kill(pid, signal.SIGUSR1)
+    except OSError:
+      print("error in SIGUSR1 to PID={} continuing".format(pid))
+
+
+def sigchld_handler(signum, frame):
+  """ Handler for SIGCHLD. Iterates through all of our known child processes and figures out whether
+      the signal/exit was expected or not. Python doesn't have any of the native signal handlers
+      ability to get the child process info directly from the signal handler so we need to iterate
+      through all child processes and see what happened."""
+
+  print("got SIGCHLD")
+
+  kill_all_and_exit = False
+  global pid_list
+  pid_list_copy = list(pid_list)
+  for pid in pid_list_copy:
+    ret_pid, exit_status = os.waitpid(pid, os.WNOHANG)
+    if ret_pid == 0 and exit_status == 0:
+      # This child is still running.
+      continue
+
+    pid_list.remove(pid)
+
+    # Now we see how the child exited.
+    if os.WIFEXITED(exit_status):
+      exit_code = os.WEXITSTATUS(exit_status)
+      print("PID={} exited with code={}".format(ret_pid, exit_code))
+      if exit_code == 0:
+        # Normal exit. We assume this was on purpose.
+        pass
+      else:
+        # Something bad happened. We need to tear everything down so that whoever started the
+        # restarter can know about this situation and restart the whole thing.
+        kill_all_and_exit = True
+    elif os.WIFSIGNALED(exit_status):
+      print("PID={} was killed with signal={}".format(ret_pid, os.WTERMSIG(exit_status)))
+      kill_all_and_exit = True
+    else:
+      kill_all_and_exit = True
+
+  if kill_all_and_exit:
+    print("Due to abnormal exit, force killing all child processes and exiting")
+
+    # First uninstall the SIGCHLD handler so that we don't get called again.
+    signal.signal(signal.SIGCHLD, signal.SIG_DFL)
+
+    force_kill_all_children()
+
+  # Our last child died, so we have no purpose. Exit.
+  if not pid_list:
+    print("exiting due to lack of child processes")
+    sys.exit(1 if kill_all_and_exit else 0)
+
+
+def fork_and_exec():
+  """ This routine forks and execs a new child process and keeps track of its PID. Before we fork,
+      set the current restart epoch in an env variable that processes can read if they care. """
+
+  global restart_epoch
+  os.environ['RESTART_EPOCH'] = str(restart_epoch)
+  print("forking and execing new child process at epoch {}".format(restart_epoch))
+  restart_epoch += 1
+
+  child_pid = os.fork()
+  if child_pid == 0:
+    # Child process
+    os.execl(sys.argv[1], sys.argv[1])
+  else:
+    # Parent process
+    print("forked new child process with PID={}".format(child_pid))
+    pid_list.append(child_pid)
+
+
+def main():
+  """ Script main. This script is designed so that a process watcher like runit or monit can watch
+      this process and take corrective action if it ever goes away. """
+
+  print("starting hot-restarter with target: {}".format(sys.argv[1]))
+
+  signal.signal(signal.SIGTERM, sigterm_handler)
+  signal.signal(signal.SIGINT, sigint_handler)
+  signal.signal(signal.SIGHUP, sighup_handler)
+  signal.signal(signal.SIGCHLD, sigchld_handler)
+  signal.signal(signal.SIGUSR1, sigusr1_handler)
+
+  # Start the first child process and then go into an endless loop since everything else happens via
+  # signals.
+  fork_and_exec()
+  while True:
+    time.sleep(60)
+
+
+if __name__ == '__main__':
+  main()
--- a/autocert/examples/hello-mtls/envoy/requirements.txt
+++ b/autocert/examples/hello-mtls/envoy/requirements.txt
@ -0,0 +1 @@
+Flask
--- a/autocert/examples/hello-mtls/envoy/server.py
+++ b/autocert/examples/hello-mtls/envoy/server.py
@ -0,0 +1,9 @@
+from flask import Flask
+app = Flask(__name__)
+
+@app.route("/")
+def hello():
+    return "Hello World!\n"
+
+if __name__ == "__main__":
+    app.run(host='127.0.0.1', port=8080, debug=False)
--- a/autocert/examples/hello-mtls/envoy/server.yaml
+++ b/autocert/examples/hello-mtls/envoy/server.yaml
@ -0,0 +1,50 @@
+static_resources:
+  listeners:
+  - address:
+      socket_address:
+        address: 0.0.0.0
+        port_value: 443
+    filter_chains:
+    - filters:
+      - name: envoy.http_connection_manager
+        config:
+          codec_type: auto
+          stat_prefix: ingress_http
+          route_config:
+            name: hello
+            virtual_hosts:
+            - name: hello
+              domains:
+              - "hello-mtls.default.svc.cluster.local"
+              routes:
+              - match:
+                  prefix: "/"
+                route:
+                  cluster: hello-mTLS
+          http_filters:
+          - name: envoy.router
+            config: {}
+      tls_context:
+        common_tls_context:
+          tls_params:
+            tls_minimum_protocol_version: TLSv1_2
+            tls_maximum_protocol_version: TLSv1_3
+            cipher_suites: "[ECDHE-ECDSA-AES128-GCM-SHA256|ECDHE-ECDSA-CHACHA20-POLY1305]"
+          tls_certificates:
+            - certificate_chain: 
+                filename: "/var/run/autocert.step.sm/site.crt"
+              private_key:
+                filename: "/var/run/autocert.step.sm/site.key"
+          validation_context:
+            trusted_ca:
+              filename: "/var/run/autocert.step.sm/root.crt"
+        require_client_certificate: true
+  clusters:
+  - name: hello-mTLS
+    connect_timeout: 0.25s
+    type: strict_dns
+    lb_policy: round_robin
+    hosts:
+    - socket_address:
+        address: 127.0.0.1
+        port_value: 8080
--- a/autocert/examples/hello-mtls/envoy/start-envoy.sh
+++ b/autocert/examples/hello-mtls/envoy/start-envoy.sh
@ -0,0 +1,4 @@
+#!/bin/sh
+
+ulimit -n 65536
+/usr/local/bin/envoy -c /src/server.yaml --service-cluster hello-mTLS --restart-epoch $RESTART_EPOCH