diff --git a/NEWS b/NEWS
index 6e620c6c..3360a86d 100644
--- a/NEWS
+++ b/NEWS
@@ -48,6 +48,9 @@ lnav v0.10.2:
      * Added a "glob" property to search tables defined in log formats
        to constrain searches to log messages from files that have a
        matching log_path value.
+     * Initial indexing of large files should be faster.  Decompression
+       and searching for line-endings are now pipelined, so they happen
+       in a thread that is separate from the regular expression matcher.
 
      Breaking Changes:
      * Added a 'language' column to the lnav_view_filters table that
@@ -77,7 +80,6 @@ lnav v0.10.2:
      * Toggling enabled/disabled filters when there is a SQL expression
        no longer causes a crash.
      * Fix a crash related to long lines that are word wrapped.
-     *
 
 lnav v0.10.1:
      Features:
diff --git a/release/loggen.py b/release/loggen.py
index 955d1b88..6de2ea71 100755
--- a/release/loggen.py
+++ b/release/loggen.py
@@ -156,8 +156,16 @@ while True:
     for fname, gen in FILES:
         for i in range(random.randrange(0, 4)):
             with open(fname, "a+") as fp:
-                fp.write(next(gen))
-                #if random.uniform(0.0, 1.0) < 0.010:
+                if random.uniform(0.0, 1.0) < 0.01:
+                    line = next(gen)
+                    prefix = line[:50]
+                    suffix = line[50:]
+                    fp.write(prefix)
+                    time.sleep(random.uniform(0.5, 0.6))
+                    fp.write(suffix)
+                else:
+                    fp.write(next(gen))
+                # if random.uniform(0.0, 1.0) < 0.010:
                 #    fp.truncate(0)
             time.sleep(random.uniform(0.01, 0.02))
             #if random.uniform(0.0, 1.0) < 0.001:
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2bc52c37..2570e352 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -506,6 +506,8 @@ add_library(
         third-party/intervaltree/IntervalTree.h
 
         third-party/md4c/md4c.h
+
+        third-party/robin_hood/robin_hood.h
 )
 
 set(lnav_SRCS lnav.cc)
diff --git a/src/Makefile.am b/src/Makefile.am
index a9238633..df4c0208 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -355,6 +355,7 @@ THIRD_PARTY_SRCS = \
     third-party/doctest-root/doctest/doctest.h \
     third-party/intervaltree/IntervalTree.h \
     third-party/md4c/md4c.c \
+    third-party/robin_hood/robin_hood.h \
 	third-party/sqlite/ext/dbdump.c \
 	third-party/sqlite/ext/series.c
 
diff --git a/src/base/auto_mem.hh b/src/base/auto_mem.hh
index d6b7496c..5216261c 100644
--- a/src/base/auto_mem.hh
+++ b/src/base/auto_mem.hh
@@ -149,16 +149,29 @@ private:
 
 class auto_buffer {
 public:
-    static auto_buffer alloc(size_t size)
+    static auto_buffer alloc(size_t capacity)
     {
-        return auto_buffer{(char*) malloc(size), size};
+        return auto_buffer{(char*) malloc(capacity), capacity};
     }
 
+    static auto_buffer from(const char* mem, size_t size)
+    {
+        auto retval = alloc(size);
+
+        retval.resize(size);
+        memcpy(retval.in(), mem, size);
+        return retval;
+    }
+
+    auto_buffer(const auto_buffer&) = delete;
+
     auto_buffer(auto_buffer&& other) noexcept
-        : ab_buffer(other.ab_buffer), ab_size(other.ab_size)
+        : ab_buffer(other.ab_buffer), ab_size(other.ab_size),
+          ab_capacity(other.ab_capacity)
     {
         other.ab_buffer = nullptr;
         other.ab_size = 0;
+        other.ab_capacity = 0;
     }
 
     ~auto_buffer()
@@ -166,48 +179,128 @@ public:
         free(this->ab_buffer);
         this->ab_buffer = nullptr;
         this->ab_size = 0;
+        this->ab_capacity = 0;
+    }
+
+    auto_buffer& operator=(auto_buffer&) = delete;
+
+    auto_buffer& operator=(auto_buffer&& other) noexcept
+    {
+        this->ab_buffer = other.ab_buffer;
+        this->ab_size = other.ab_size;
+        this->ab_capacity = other.ab_capacity;
+        return *this;
+    }
+
+    void swap(auto_buffer& other)
+    {
+        std::swap(this->ab_buffer, other.ab_buffer);
+        std::swap(this->ab_size, other.ab_size);
+        std::swap(this->ab_capacity, other.ab_capacity);
     }
 
     char* in() { return this->ab_buffer; }
 
+    char* at(size_t offset) { return &this->ab_buffer[offset]; }
+
+    const char* at(size_t offset) const { return &this->ab_buffer[offset]; }
+
+    char* begin() { return this->ab_buffer; }
+
+    const char* begin() const { return this->ab_buffer; }
+
+    std::reverse_iterator<char*> rbegin()
+    {
+        return std::reverse_iterator<char*>(this->end());
+    }
+
+    std::reverse_iterator<const char*> rbegin() const
+    {
+        return std::reverse_iterator<const char*>(this->end());
+    }
+
+    char* end() { return &this->ab_buffer[this->ab_size]; }
+
+    const char* end() const { return &this->ab_buffer[this->ab_size]; }
+
+    std::reverse_iterator<char*> rend()
+    {
+        return std::reverse_iterator<char*>(this->begin());
+    }
+
+    std::reverse_iterator<const char*> rend() const
+    {
+        return std::reverse_iterator<const char*>(this->begin());
+    }
+
     std::pair<char*, size_t> release()
     {
         auto retval = std::make_pair(this->ab_buffer, this->ab_size);
 
         this->ab_buffer = nullptr;
         this->ab_size = 0;
+        this->ab_capacity = 0;
         return retval;
     }
 
     size_t size() const { return this->ab_size; }
 
-    void expand_by(size_t amount)
+    bool empty() const { return this->ab_size == 0; }
+
+    bool full() const { return this->ab_size == this->ab_capacity; }
+
+    size_t capacity() const { return this->ab_capacity; }
+
+    size_t available() const { return this->ab_capacity - this->ab_size; }
+
+    void clear() { this->resize(0); }
+
+    auto_buffer& resize(size_t new_size)
     {
-        if (amount == 0) {
+        assert(new_size <= this->ab_capacity);
+
+        this->ab_size = new_size;
+        return *this;
+    }
+
+    auto_buffer& resize_by(ssize_t amount)
+    {
+        return this->resize(this->ab_size + amount);
+    }
+
+    void expand_to(size_t new_capacity)
+    {
+        if (new_capacity <= this->ab_capacity) {
             return;
         }
-        auto new_size = this->ab_size + amount;
-        auto new_buffer = (char*) realloc(this->ab_buffer, new_size);
+        auto* new_buffer = (char*) realloc(this->ab_buffer, new_capacity);
 
         if (new_buffer == nullptr) {
             throw std::bad_alloc();
         }
 
         this->ab_buffer = new_buffer;
-        this->ab_size = new_size;
+        this->ab_capacity = new_capacity;
     }
 
-    auto_buffer& shrink_to(size_t new_size)
+    void expand_by(size_t amount)
     {
-        this->ab_size = new_size;
-        return *this;
+        if (amount == 0) {
+            return;
+        }
+
+        this->expand_to(this->ab_capacity + amount);
     }
 
 private:
-    auto_buffer(char* buffer, size_t size) : ab_buffer(buffer), ab_size(size) {}
+    auto_buffer(char* buffer, size_t capacity)
+        : ab_buffer(buffer), ab_capacity(capacity)
+    {
+    }
 
     char* ab_buffer;
-    size_t ab_size;
+    size_t ab_size{0};
+    size_t ab_capacity;
 };
 
 struct text_auto_buffer {
diff --git a/src/base/date_time_scanner.cc b/src/base/date_time_scanner.cc
index bca6dfbc..346f5bba 100644
--- a/src/base/date_time_scanner.cc
+++ b/src/base/date_time_scanner.cc
@@ -154,9 +154,25 @@ date_time_scanner::scan(const char* time_dest,
 
                     this->to_localtime(gmt, *tm_out);
                 }
-                tv_out.tv_sec = tm2sec(&tm_out->et_tm);
+                const auto& last_tm = this->dts_last_tm.et_tm;
+                if (last_tm.tm_year == tm_out->et_tm.tm_year
+                    && last_tm.tm_mon == tm_out->et_tm.tm_mon
+                    && last_tm.tm_mday == tm_out->et_tm.tm_mday
+                    && last_tm.tm_hour == tm_out->et_tm.tm_hour
+                    && last_tm.tm_min == tm_out->et_tm.tm_min)
+                {
+                    const auto sec_diff = tm_out->et_tm.tm_sec - last_tm.tm_sec;
+
+                    // log_debug("diff %d", sec_diff);
+                    tv_out = this->dts_last_tv;
+                    tv_out.tv_sec += sec_diff;
+                    tm_out->et_tm.tm_wday = last_tm.tm_wday;
+                } else {
+                    // log_debug("doing tm2sec");
+                    tv_out.tv_sec = tm2sec(&tm_out->et_tm);
+                    secs2wday(tv_out, &tm_out->et_tm);
+                }
                 tv_out.tv_usec = tm_out->et_nsec / 1000;
-                secs2wday(tv_out, &tm_out->et_tm);
 
                 this->dts_fmt_lock = curr_time_fmt;
                 this->dts_fmt_len = retval - time_dest;
@@ -211,6 +227,11 @@ date_time_scanner::scan(const char* time_dest,
         retval = nullptr;
     }
 
+    if (retval != nullptr) {
+        this->dts_last_tm = *tm_out;
+        this->dts_last_tv = tv_out;
+    }
+
     if (retval != nullptr && static_cast<size_t>(retval - time_dest) < time_len)
     {
         /* Try to pull out the milli/micro-second value. */
diff --git a/src/base/date_time_scanner.hh b/src/base/date_time_scanner.hh
index 34a2ddc9..c415c6d7 100644
--- a/src/base/date_time_scanner.hh
+++ b/src/base/date_time_scanner.hh
@@ -55,6 +55,8 @@ struct date_time_scanner {
         this->dts_base_tm = exttm{};
         this->dts_fmt_lock = -1;
         this->dts_fmt_len = -1;
+        this->dts_last_tv = timeval{};
+        this->dts_last_tm = exttm{};
     }
 
     /**
@@ -70,6 +72,8 @@ struct date_time_scanner {
     {
         this->dts_base_time = base_time;
         localtime_r(&base_time, &this->dts_base_tm.et_tm);
+        this->dts_last_tm = exttm{};
+        this->dts_last_tv = timeval{};
     }
 
     /**
@@ -87,6 +91,8 @@ struct date_time_scanner {
     struct exttm dts_base_tm;
     int dts_fmt_lock{-1};
     int dts_fmt_len{-1};
+    struct exttm dts_last_tm {};
+    struct timeval dts_last_tv {};
     time_t dts_local_offset_cache{0};
     time_t dts_local_offset_valid{0};
     time_t dts_local_offset_expiry{0};
diff --git a/src/base/intern_string.hh b/src/base/intern_string.hh
index 0f7c49f7..afcec59e 100644
--- a/src/base/intern_string.hh
+++ b/src/base/intern_string.hh
@@ -333,6 +333,12 @@ struct string_fragment {
     int sf_end;
 };
 
+inline bool
+operator==(const std::string& left, const string_fragment& right)
+{
+    return right == left;
+}
+
 inline bool
 operator<(const char* left, const string_fragment& right)
 {
diff --git a/src/base/lnav.gzip.cc b/src/base/lnav.gzip.cc
index e50fc12a..903ce750 100644
--- a/src/base/lnav.gzip.cc
+++ b/src/base/lnav.gzip.cc
@@ -56,7 +56,7 @@ compress(const void* input, size_t len)
     zs.opaque = Z_NULL;
     zs.avail_in = (uInt) len;
     zs.next_in = (Bytef*) input;
-    zs.avail_out = (uInt) retval.size();
+    zs.avail_out = (uInt) retval.capacity();
     zs.next_out = (Bytef*) retval.in();
     zs.total_out = 0;
 
@@ -76,7 +76,7 @@ compress(const void* input, size_t len)
         return Err(fmt::format(
             FMT_STRING("unable to finalize compression -- {}"), zError(rc)));
     }
-    return Ok(std::move(retval.shrink_to(zs.total_out)));
+    return Ok(std::move(retval.resize(zs.total_out)));
 }
 
 Result<auto_buffer, std::string>
@@ -107,7 +107,7 @@ uncompress(const std::string& src, const void* buffer, size_t size)
         }
 
         strm.next_out = (Bytef*) (uncomp.in() + strm.total_out);
-        strm.avail_out = uncomp.size() - strm.total_out;
+        strm.avail_out = uncomp.capacity() - strm.total_out;
 
         // Inflate another chunk.
         err = inflate(&strm, Z_SYNC_FLUSH);
@@ -127,7 +127,7 @@ uncompress(const std::string& src, const void* buffer, size_t size)
                                strm.msg ? strm.msg : zError(err)));
     }
 
-    return Ok(std::move(uncomp.shrink_to(strm.total_out)));
+    return Ok(std::move(uncomp.resize(strm.total_out)));
 }
 
 }  // namespace gzip
diff --git a/src/base/lnav.gzip.tests.cc b/src/base/lnav.gzip.tests.cc
index 28a61bea..511a1c86 100644
--- a/src/base/lnav.gzip.tests.cc
+++ b/src/base/lnav.gzip.tests.cc
@@ -53,3 +53,15 @@ TEST_CASE("lnav::gzip::uncompress")
               == "unable to uncompress: garbage -- incorrect header check");
     }
 }
+
+TEST_CASE("lnav::gzip::roundtrip")
+{
+    const char msg[] = "Hello, World!";
+
+    auto c_res = lnav::gzip::compress(msg, sizeof(msg));
+    auto buf = c_res.unwrap();
+    auto u_res = lnav::gzip::uncompress("test", buf.in(), buf.size());
+    auto buf2 = u_res.unwrap();
+
+    CHECK(std::string(msg) == std::string(buf2.in()));
+}
diff --git a/src/base/time_util.cc b/src/base/time_util.cc
index a2898eb3..31805795 100644
--- a/src/base/time_util.cc
+++ b/src/base/time_util.cc
@@ -114,8 +114,9 @@ secs2wday(const struct timeval& tv, struct tm* res)
     }
 
     /* compute day of week */
-    if ((res->tm_wday = ((EPOCH_WDAY + days) % DAYSPERWEEK)) < 0)
+    if ((res->tm_wday = ((EPOCH_WDAY + days) % DAYSPERWEEK)) < 0) {
         res->tm_wday += DAYSPERWEEK;
+    }
 }
 
 struct tm*
diff --git a/src/hist_source.hh b/src/hist_source.hh
index 89aa86d2..efbc71c1 100644
--- a/src/hist_source.hh
+++ b/src/hist_source.hh
@@ -36,6 +36,7 @@
 #include <limits>
 #include <map>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "base/lnav_log.hh"
@@ -300,7 +301,7 @@ protected:
     bool sbc_do_stacking{true};
     unsigned long sbc_left{0}, sbc_right{0};
     std::vector<struct chart_ident> sbc_idents;
-    std::map<T, unsigned int> sbc_ident_lookup;
+    std::unordered_map<T, unsigned int> sbc_ident_lookup;
     show_state sbc_show_state{show_all()};
 };
 
diff --git a/src/line_buffer.cc b/src/line_buffer.cc
index 99f437c8..1fe44cdc 100644
--- a/src/line_buffer.cc
+++ b/src/line_buffer.cc
@@ -47,7 +47,10 @@
 #    include "simdutf8check.h"
 #endif
 
+#include "base/injector.bind.hh"
+#include "base/injector.hh"
 #include "base/is_utf8.hh"
+#include "base/isc.hh"
 #include "base/math_util.hh"
 #include "fmtlib/fmt/format.h"
 #include "line_buffer.hh"
@@ -56,6 +59,21 @@ static const ssize_t INITIAL_REQUEST_SIZE = 16 * 1024;
 static const ssize_t DEFAULT_INCREMENT = 128 * 1024;
 static const ssize_t MAX_COMPRESSED_BUFFER_SIZE = 32 * 1024 * 1024;
 
+class io_looper : public isc::service<io_looper> {};
+
+struct io_looper_tag {};
+
+static auto bound_io = injector::bind_multiple<isc::service_base>()
+                           .add_singleton<io_looper, io_looper_tag>();
+
+namespace injector {
+template<>
+void
+force_linking(io_looper_tag anno)
+{
+}
+}  // namespace injector
+
 /*
  * XXX REMOVE ME
  *
@@ -284,6 +302,7 @@ int
 line_buffer::gz_indexed::read(void* buf, size_t offset, size_t size)
 {
     if (offset != this->strm.total_out) {
+        // log_debug("doing seek!  %d %d", offset, this->strm.total_out);
         this->seek(offset);
     }
 
@@ -293,15 +312,7 @@ line_buffer::gz_indexed::read(void* buf, size_t offset, size_t size)
 }
 
 line_buffer::line_buffer()
-    : lb_bz_file(false), lb_compressed_offset(0), lb_file_size(-1),
-      lb_file_offset(0), lb_file_time(0), lb_buffer_size(0),
-      lb_buffer_max(DEFAULT_LINE_BUFFER_SIZE), lb_seekable(false),
-      lb_last_line_offset(-1)
 {
-    if ((this->lb_buffer = (char*) malloc(this->lb_buffer_max)) == nullptr) {
-        throw std::bad_alloc();
-    }
-
     ensure(this->invariant());
 }
 
@@ -319,8 +330,12 @@ line_buffer::set_fd(auto_fd& fd)
 {
     file_off_t newoff = 0;
 
-    if (this->lb_gz_file) {
-        this->lb_gz_file.close();
+    {
+        safe::WriteAccess<safe_gz_indexed> gi(this->lb_gz_file);
+
+        if (*gi) {
+            gi->close();
+        }
     }
 
     if (this->lb_bz_file) {
@@ -350,7 +365,8 @@ line_buffer::set_fd(auto_fd& fd)
                         close(gzfd);
                         throw error(errno);
                     }
-                    lb_gz_file.open(gzfd);
+                    lb_gz_file.writeAccess()->open(gzfd);
+                    this->lb_compressed = true;
                     this->lb_file_time
                         = read_le32((const unsigned char*) &gz_id[4]);
                     if (this->lb_file_time < 0) {
@@ -366,6 +382,7 @@ line_buffer::set_fd(auto_fd& fd)
                         throw error(errno);
                     }
                     this->lb_bz_file = true;
+                    this->lb_compressed = true;
 
                     /*
                      * Loading data from a bzip2 file is pretty slow, so we try
@@ -380,8 +397,9 @@ line_buffer::set_fd(auto_fd& fd)
             this->lb_seekable = true;
         }
     }
+    log_debug("newoff %d", newoff);
     this->lb_file_offset = newoff;
-    this->lb_buffer_size = 0;
+    this->lb_buffer.clear();
     this->lb_fd = std::move(fd);
 
     ensure(this->invariant());
@@ -390,24 +408,10 @@ line_buffer::set_fd(auto_fd& fd)
 void
 line_buffer::resize_buffer(size_t new_max)
 {
-    require(this->lb_bz_file || this->lb_gz_file
-            || new_max <= MAX_LINE_BUFFER_SIZE);
-
-    if (new_max > (size_t) this->lb_buffer_max) {
-        char *tmp, *old;
-
+    if (new_max > (size_t) this->lb_buffer.capacity()) {
         /* Still need more space, try a realloc. */
-        old = this->lb_buffer.release();
         this->lb_share_manager.invalidate_refs();
-        tmp = (char*) realloc(old, new_max);
-        if (tmp != NULL) {
-            this->lb_buffer = tmp;
-            this->lb_buffer_max = new_max;
-        } else {
-            this->lb_buffer = old;
-
-            throw error(ENOMEM);
-        }
+        this->lb_buffer.expand_to(new_max);
     }
 }
 
@@ -416,7 +420,9 @@ line_buffer::ensure_available(file_off_t start, ssize_t max_length)
 {
     ssize_t prefill, available;
 
-    require(max_length <= MAX_LINE_BUFFER_SIZE);
+    require(this->lb_compressed || max_length <= MAX_LINE_BUFFER_SIZE);
+
+    // log_debug("ensure avail %d %d", start, max_length);
 
     if (this->lb_file_size != -1) {
         if (start + (file_off_t) max_length > this->lb_file_size) {
@@ -429,7 +435,7 @@ line_buffer::ensure_available(file_off_t start, ssize_t max_length)
      * after.
      */
     if (start < this->lb_file_offset
-        || start > (file_off_t) (this->lb_file_offset + this->lb_buffer_size))
+        || start > (file_off_t) (this->lb_file_offset + this->lb_buffer.size()))
     {
         /*
          * The request is outside the cached range, need to reload the
@@ -437,20 +443,22 @@ line_buffer::ensure_available(file_off_t start, ssize_t max_length)
          */
         this->lb_share_manager.invalidate_refs();
         prefill = 0;
-        this->lb_buffer_size = 0;
+        this->lb_buffer.clear();
         if ((this->lb_file_size != (ssize_t) -1)
-            && (start + this->lb_buffer_max > this->lb_file_size))
+            && (start + this->lb_buffer.capacity() > this->lb_file_size))
         {
+            require(start < this->lb_file_size);
             /*
              * If the start is near the end of the file, move the offset back a
              * bit so we can get more of the file in the cache.
              */
             this->lb_file_offset = this->lb_file_size
                 - std::min(this->lb_file_size,
-                           (file_ssize_t) this->lb_buffer_max);
+                           (file_ssize_t) this->lb_buffer.capacity());
         } else {
             this->lb_file_offset = start;
         }
+        log_debug("adjusted file offset %d %d", start, this->lb_file_offset);
     } else {
         /* The request is in the cached range.  Record how much extra data is in
          * the buffer before the requested range.
@@ -458,29 +466,205 @@ line_buffer::ensure_available(file_off_t start, ssize_t max_length)
         prefill = start - this->lb_file_offset;
     }
     require(this->lb_file_offset <= start);
-    require(prefill <= this->lb_buffer_size);
+    require(prefill <= this->lb_buffer.size());
 
-    available = this->lb_buffer_max - (start - this->lb_file_offset);
-    require(available <= this->lb_buffer_max);
+    available = this->lb_buffer.capacity() - (start - this->lb_file_offset);
+    require(available <= this->lb_buffer.capacity());
 
     if (max_length > available) {
+        // log_debug("need more space!");
         /*
          * Need more space, move any existing data to the front of the
          * buffer.
          */
         this->lb_share_manager.invalidate_refs();
 
-        this->lb_buffer_size -= prefill;
+        this->lb_buffer.resize_by(-prefill);
         this->lb_file_offset += prefill;
-        memmove(&this->lb_buffer[0],
-                &this->lb_buffer[prefill],
-                this->lb_buffer_size);
+        // log_debug("adjust file offset for prefill %d", this->lb_file_offset);
+        memmove(this->lb_buffer.at(0),
+                this->lb_buffer.at(prefill),
+                this->lb_buffer.size());
 
-        available = this->lb_buffer_max - (start - this->lb_file_offset);
+        available = this->lb_buffer.capacity() - (start - this->lb_file_offset);
         if (max_length > available) {
             this->resize_buffer(roundup_size(max_length, DEFAULT_INCREMENT));
         }
     }
+    this->lb_line_starts.clear();
+    this->lb_line_is_utf.clear();
+}
+
+bool
+line_buffer::load_next_buffer()
+{
+    // log_debug("loader here!");
+    auto retval = false;
+    auto start = this->lb_loader_file_offset.value();
+    ssize_t rc = 0;
+    safe::WriteAccess<safe_gz_indexed> gi(this->lb_gz_file);
+
+    // log_debug("BEGIN preload read");
+    /* ... read in the new data. */
+    if (*gi) {
+        if (this->lb_file_size != (ssize_t) -1 && this->in_range(start)
+            && this->in_range(this->lb_file_size - 1))
+        {
+            rc = 0;
+        } else {
+            // log_debug("decomp start");
+            rc = gi->read(this->lb_alt_buffer.value().end(),
+                          start + this->lb_alt_buffer.value().size(),
+                          this->lb_alt_buffer.value().available());
+            this->lb_compressed_offset = gi->get_source_offset();
+            if (rc != -1 && (rc < this->lb_alt_buffer.value().available())
+                && (start + this->lb_alt_buffer.value().size() + rc
+                    > this->lb_file_size))
+            {
+                this->lb_file_size
+                    = (start + this->lb_alt_buffer.value().size() + rc);
+            }
+            // log_debug("decomp end");
+        }
+    }
+#ifdef HAVE_BZLIB_H
+    else if (this->lb_bz_file)
+    {
+        if (this->lb_file_size != (ssize_t) -1
+            && (((ssize_t) start >= this->lb_file_size)
+                || (this->in_range(start)
+                    && this->in_range(this->lb_file_size - 1))))
+        {
+            rc = 0;
+        } else {
+            lock_hack::guard guard;
+            char scratch[32 * 1024];
+            BZFILE* bz_file;
+            file_off_t seek_to;
+            int bzfd;
+
+            /*
+             * Unfortunately, there is no bzseek, so we need to reopen the
+             * file every time we want to do a read.
+             */
+            bzfd = dup(this->lb_fd);
+            if (lseek(this->lb_fd, 0, SEEK_SET) < 0) {
+                close(bzfd);
+                throw error(errno);
+            }
+            if ((bz_file = BZ2_bzdopen(bzfd, "r")) == NULL) {
+                close(bzfd);
+                if (errno == 0) {
+                    throw std::bad_alloc();
+                } else {
+                    throw error(errno);
+                }
+            }
+
+            seek_to = start + this->lb_alt_buffer.value().size();
+            while (seek_to > 0) {
+                int count;
+
+                count = BZ2_bzread(bz_file,
+                                   scratch,
+                                   std::min((size_t) seek_to, sizeof(scratch)));
+                seek_to -= count;
+            }
+            rc = BZ2_bzread(bz_file,
+                            this->lb_alt_buffer->end(),
+                            this->lb_alt_buffer->available());
+            this->lb_compressed_offset = lseek(bzfd, 0, SEEK_SET);
+            BZ2_bzclose(bz_file);
+
+            if (rc != -1 && (rc < (this->lb_alt_buffer.value().available())) &&
+                (start + this->lb_alt_buffer.value().size() + rc > this->lb_file_size)) {
+                this->lb_file_size
+                    = (start + this->lb_alt_buffer.value().size() + rc);
+            }
+        }
+    }
+#endif
+    else
+    {
+        rc = pread(this->lb_fd,
+                   this->lb_alt_buffer.value().end(),
+                   this->lb_alt_buffer.value().available(),
+                   start + this->lb_alt_buffer.value().size());
+    }
+    // XXX For some reason, cygwin is giving us a bogus return value when
+    // up to the end of the file.
+    if (rc > (this->lb_alt_buffer.value().available())) {
+        rc = -1;
+#ifdef ENODATA
+        errno = ENODATA;
+#else
+        errno = EAGAIN;
+#endif
+    }
+    switch (rc) {
+        case 0:
+            if (start < (file_off_t) this->lb_file_size) {
+                retval = true;
+            }
+            break;
+
+        case (ssize_t) -1:
+            switch (errno) {
+#ifdef ENODATA
+                /* Cygwin seems to return this when pread reaches the end of
+                 * the file. */
+                case ENODATA:
+#endif
+                case EINTR:
+                case EAGAIN:
+                    break;
+
+                default:
+                    throw error(errno);
+            }
+            break;
+
+        default:
+            this->lb_alt_buffer.value().resize_by(rc);
+            retval = true;
+            break;
+    }
+    // log_debug("END preload read");
+
+    if (start > this->lb_last_line_offset) {
+        auto* line_start = this->lb_alt_buffer.value().begin();
+
+        do {
+            const char* msg = nullptr;
+            int faulty_bytes = 0;
+            bool valid_utf = true;
+            char* lf = nullptr;
+
+            auto before = line_start - this->lb_alt_buffer->begin();
+            auto remaining = this->lb_alt_buffer.value().size() - before;
+            auto utf8_end = is_utf8(
+                (unsigned char*) line_start, remaining, &msg, &faulty_bytes);
+            if (msg != nullptr) {
+                lf = (char*) memchr(line_start, '\n', remaining);
+                utf8_end = lf - line_start;
+                valid_utf = false;
+            }
+            if (utf8_end >= 0) {
+                lf = line_start + utf8_end;
+            }
+            this->lb_alt_line_starts.emplace_back(before);
+            this->lb_alt_line_is_utf.emplace_back(valid_utf);
+
+            if (lf != nullptr) {
+                line_start = lf + 1;
+            } else {
+                line_start = nullptr;
+            }
+        } while (line_start != nullptr
+                 && line_start < this->lb_alt_buffer->end());
+    }
+
+    return retval;
 }
 
 bool
@@ -490,34 +674,110 @@ line_buffer::fill_range(file_off_t start, ssize_t max_length)
 
     require(start >= 0);
 
+    // log_debug("fill range %d %d", start, max_length);
+#if 0
+    log_debug("(%p) fill range %d %d (%d) %d",
+              this,
+              start,
+              max_length,
+              this->lb_file_offset,
+              this->lb_buffer.size());
+#endif
+    if (this->lb_loader_future.valid()
+        && start >= this->lb_loader_file_offset.value())
+    {
+#if 0
+        log_debug("getting preload! %d %d",
+                  start,
+                  this->lb_loader_file_offset.value());
+#endif
+        nonstd::optional<std::chrono::system_clock::time_point> wait_start;
+
+        if (this->lb_loader_future.wait_for(std::chrono::seconds(0))
+            != std::future_status::ready)
+        {
+            wait_start
+                = nonstd::make_optional(std::chrono::system_clock::now());
+        }
+        retval = this->lb_loader_future.get();
+        if (wait_start) {
+            auto diff = std::chrono::system_clock::now() - wait_start.value();
+            log_debug("wait done! %d", diff.count());
+        }
+        // log_debug("got preload");
+        this->lb_loader_future = {};
+        this->lb_file_offset = this->lb_loader_file_offset.value();
+        this->lb_loader_file_offset = nonstd::nullopt;
+        this->lb_buffer.swap(this->lb_alt_buffer.value());
+        this->lb_alt_buffer.value().clear();
+        this->lb_line_starts = std::move(this->lb_alt_line_starts);
+        this->lb_alt_line_starts.clear();
+        this->lb_line_is_utf = std::move(this->lb_alt_line_is_utf);
+        this->lb_alt_line_is_utf.clear();
+    }
     if (this->in_range(start) && this->in_range(start + max_length - 1)) {
         /* Cache already has the data, nothing to do. */
         retval = true;
+        if (this->lb_seekable && this->lb_buffer.full()
+            && !this->lb_loader_file_offset) {
+            // log_debug("loader available start=%d", start);
+            auto last_lf_iter = std::find(
+                this->lb_buffer.rbegin(), this->lb_buffer.rend(), '\n');
+            if (last_lf_iter != this->lb_buffer.rend()) {
+                auto usable_size
+                    = std::distance(last_lf_iter, this->lb_buffer.rend());
+                // log_debug("found linefeed %d", usable_size);
+                if (!this->lb_alt_buffer) {
+                    // log_debug("allocating new buffer!");
+                    this->lb_alt_buffer
+                        = auto_buffer::alloc(this->lb_buffer.capacity());
+                }
+                this->lb_alt_buffer->resize(this->lb_buffer.size()
+                                            - usable_size);
+                memcpy(this->lb_alt_buffer.value().begin(),
+                       this->lb_buffer.at(usable_size),
+                       this->lb_alt_buffer->size());
+                this->lb_loader_file_offset
+                    = this->lb_file_offset + usable_size;
+#if 0
+                log_debug("load offset %d",
+                          this->lb_loader_file_offset.value());
+                log_debug("launch loader");
+#endif
+                auto prom = std::make_shared<std::promise<bool>>();
+                this->lb_loader_future = prom->get_future();
+                isc::to<io_looper&, io_looper_tag>().send(
+                    [this, prom](auto& ioloop) mutable {
+                        prom->set_value(this->load_next_buffer());
+                    });
+            }
+        }
     } else if (this->lb_fd != -1) {
         ssize_t rc;
 
         /* Make sure there is enough space, then */
         this->ensure_available(start, max_length);
 
+        safe::WriteAccess<safe_gz_indexed> gi(this->lb_gz_file);
+
         /* ... read in the new data. */
-        if (this->lb_gz_file) {
+        if (*gi) {
+            // log_debug("old decomp start");
             if (this->lb_file_size != (ssize_t) -1 && this->in_range(start)
                 && this->in_range(this->lb_file_size - 1))
             {
                 rc = 0;
             } else {
-                rc = this->lb_gz_file.read(
-                    &this->lb_buffer[this->lb_buffer_size],
-                    this->lb_file_offset + this->lb_buffer_size,
-                    this->lb_buffer_max - this->lb_buffer_size);
-                this->lb_compressed_offset
-                    = this->lb_gz_file.get_source_offset();
-                if (rc != -1
-                    && (rc < (this->lb_buffer_max - this->lb_buffer_size))) {
+                rc = gi->read(this->lb_buffer.end(),
+                              this->lb_file_offset + this->lb_buffer.size(),
+                              this->lb_buffer.available());
+                this->lb_compressed_offset = gi->get_source_offset();
+                if (rc != -1 && (rc < this->lb_buffer.available())) {
                     this->lb_file_size
-                        = (this->lb_file_offset + this->lb_buffer_size + rc);
+                        = (this->lb_file_offset + this->lb_buffer.size() + rc);
                 }
             }
+            // log_debug("old decomp end");
         }
 #ifdef HAVE_BZLIB_H
         else if (this->lb_bz_file)
@@ -553,7 +813,7 @@ line_buffer::fill_range(file_off_t start, ssize_t max_length)
                     }
                 }
 
-                seek_to = this->lb_file_offset + this->lb_buffer_size;
+                seek_to = this->lb_file_offset + this->lb_buffer.size();
                 while (seek_to > 0) {
                     int count;
 
@@ -564,33 +824,39 @@ line_buffer::fill_range(file_off_t start, ssize_t max_length)
                     seek_to -= count;
                 }
                 rc = BZ2_bzread(bz_file,
-                                &this->lb_buffer[this->lb_buffer_size],
-                                this->lb_buffer_max - this->lb_buffer_size);
+                                this->lb_buffer.end(),
+                                this->lb_buffer.available());
                 this->lb_compressed_offset = lseek(bzfd, 0, SEEK_SET);
                 BZ2_bzclose(bz_file);
 
-                if (rc != -1
-                    && (rc < (this->lb_buffer_max - this->lb_buffer_size))) {
+                if (rc != -1 && (rc < (this->lb_buffer.available()))) {
                     this->lb_file_size
-                        = (this->lb_file_offset + this->lb_buffer_size + rc);
+                        = (this->lb_file_offset + this->lb_buffer.size() + rc);
                 }
             }
         }
 #endif
         else if (this->lb_seekable)
         {
+#if 1
+            log_debug("doing pread file_off=%d read_off=%d count=%d",
+                      this->lb_file_offset,
+                      this->lb_file_offset + this->lb_buffer.size(),
+                      this->lb_buffer.available());
+#endif
             rc = pread(this->lb_fd,
-                       &this->lb_buffer[this->lb_buffer_size],
-                       this->lb_buffer_max - this->lb_buffer_size,
-                       this->lb_file_offset + this->lb_buffer_size);
+                       this->lb_buffer.end(),
+                       this->lb_buffer.available(),
+                       this->lb_file_offset + this->lb_buffer.size());
+            // log_debug("pread rc %d", rc);
         } else {
             rc = read(this->lb_fd,
-                      &this->lb_buffer[this->lb_buffer_size],
-                      this->lb_buffer_max - this->lb_buffer_size);
+                      this->lb_buffer.end(),
+                      this->lb_buffer.available());
         }
         // XXX For some reason, cygwin is giving us a bogus return value when
         // up to the end of the file.
-        if (rc > (this->lb_buffer_max - this->lb_buffer_size)) {
+        if (rc > (this->lb_buffer.available())) {
             rc = -1;
 #ifdef ENODATA
             errno = ENODATA;
@@ -602,13 +868,13 @@ line_buffer::fill_range(file_off_t start, ssize_t max_length)
             case 0:
                 if (!this->lb_seekable) {
                     this->lb_file_size
-                        = this->lb_file_offset + this->lb_buffer_size;
+                        = this->lb_file_offset + this->lb_buffer.size();
                 }
                 if (start < (file_off_t) this->lb_file_size) {
                     retval = true;
                 }
 
-                if (this->lb_gz_file || this->lb_bz_file) {
+                if (this->lb_compressed) {
                     /*
                      * For compressed files, increase the buffer size so we
                      * don't have to spend as much time uncompressing the data.
@@ -635,12 +901,46 @@ line_buffer::fill_range(file_off_t start, ssize_t max_length)
                 break;
 
             default:
-                this->lb_buffer_size += rc;
+                this->lb_buffer.resize_by(rc);
                 retval = true;
                 break;
         }
 
-        ensure(this->lb_buffer_size <= this->lb_buffer_max);
+        if (this->lb_seekable && this->lb_buffer.full()
+            && !this->lb_loader_file_offset) {
+            // log_debug("loader available2 start=%d", start);
+            auto last_lf_iter = std::find(
+                this->lb_buffer.rbegin(), this->lb_buffer.rend(), '\n');
+            if (last_lf_iter != this->lb_buffer.rend()) {
+                auto usable_size
+                    = std::distance(last_lf_iter, this->lb_buffer.rend());
+                // log_debug("found linefeed %d", usable_size);
+                if (!this->lb_alt_buffer) {
+                    // log_debug("allocating new buffer!");
+                    this->lb_alt_buffer
+                        = auto_buffer::alloc(this->lb_buffer.capacity());
+                }
+                this->lb_alt_buffer->resize(this->lb_buffer.size()
+                                            - usable_size);
+                memcpy(this->lb_alt_buffer->begin(),
+                       this->lb_buffer.at(usable_size),
+                       this->lb_alt_buffer->size());
+                this->lb_loader_file_offset
+                    = this->lb_file_offset + usable_size;
+#if 0
+                log_debug("load offset %d",
+                          this->lb_loader_file_offset.value());
+                log_debug("launch loader");
+#endif
+                auto prom = std::make_shared<std::promise<bool>>();
+                this->lb_loader_future = prom->get_future();
+                isc::to<io_looper&, io_looper_tag>().send(
+                    [this, prom](auto& ioloop) mutable {
+                        prom->set_value(this->load_next_buffer());
+                    });
+            }
+        }
+        ensure(this->lb_buffer.size() <= this->lb_buffer.capacity());
     }
 
     return retval;
@@ -655,26 +955,54 @@ line_buffer::load_next_line(file_range prev_line)
     require(this->lb_fd != -1);
 
     auto offset = prev_line.next_offset();
-    ssize_t request_size = this->lb_buffer_size == 0 ? DEFAULT_INCREMENT
-                                                     : INITIAL_REQUEST_SIZE;
+    ssize_t request_size = INITIAL_REQUEST_SIZE;
     retval.li_file_range.fr_offset = offset;
+    if (this->lb_buffer.empty()) {
+        this->fill_range(offset, this->lb_buffer.capacity());
+    } else if (offset == this->lb_file_offset + this->lb_buffer.size()) {
+        if (!this->fill_range(offset, INITIAL_REQUEST_SIZE)) {
+            retval.li_file_range.fr_offset = offset;
+            retval.li_file_range.fr_size = 0;
+            if (this->is_pipe()) {
+                retval.li_partial = !this->is_pipe_closed();
+            } else {
+                retval.li_partial = true;
+            }
+            return Ok(retval);
+        }
+    }
     while (!done) {
+        auto old_retval_size = retval.li_file_range.fr_size;
         char *line_start, *lf;
 
-        this->fill_range(offset, request_size);
-
         /* Find the data in the cache and */
         line_start = this->get_range(offset, retval.li_file_range.fr_size);
         /* ... look for the end-of-line or end-of-file. */
         ssize_t utf8_end = -1;
 
-#ifdef HAVE_X86INTRIN_H
-        if (!validate_utf8_fast(
-                line_start, retval.li_file_range.fr_size, &utf8_end)) {
-            retval.li_valid_utf = false;
+        bool found_in_cache = false;
+        if (!this->lb_line_starts.empty()) {
+            auto buffer_offset = offset - this->lb_file_offset;
+
+            auto start_iter = std::lower_bound(this->lb_line_starts.begin(),
+                                               this->lb_line_starts.end(),
+                                               buffer_offset);
+            if (start_iter != this->lb_line_starts.end()) {
+                auto next_line_iter = start_iter + 1;
+
+                // log_debug("found offset %d %d", buffer_offset, *start_iter);
+                if (next_line_iter != this->lb_line_starts.end()) {
+                    utf8_end = *next_line_iter - 1 - *start_iter;
+                    found_in_cache = true;
+                } else {
+                    // log_debug("no next iter");
+                }
+            } else {
+                // log_debug("no buffer_offset found");
+            }
         }
-#else
-        {
+
+        if (!found_in_cache) {
             const char* msg;
             int faulty_bytes;
 
@@ -689,18 +1017,24 @@ line_buffer::load_next_line(file_range prev_line)
                 retval.li_valid_utf = false;
             }
         }
-#endif
+
         if (utf8_end >= 0) {
             lf = line_start + utf8_end;
         } else {
             lf = nullptr;
         }
 
+        auto got_new_data = old_retval_size != retval.li_file_range.fr_size;
+#if 0
+        log_debug("load next loop %p reqsize %d lsize %d",
+                  lf,
+                  request_size,
+                  retval.li_file_range.fr_size);
+#endif
         if (lf != nullptr
             || (retval.li_file_range.fr_size >= MAX_LINE_BUFFER_SIZE)
             || (request_size == MAX_LINE_BUFFER_SIZE)
-            || ((request_size > retval.li_file_range.fr_size)
-                && (retval.li_file_range.fr_size > 0)
+            || (!got_new_data
                 && (!this->is_pipe() || request_size > DEFAULT_INCREMENT)))
         {
             if ((lf != nullptr)
@@ -756,14 +1090,22 @@ line_buffer::load_next_line(file_range prev_line)
             request_size += DEFAULT_INCREMENT;
         }
 
-        if (!done && !this->fill_range(offset, request_size)) {
+        if (!done
+            && !this->fill_range(
+                offset,
+                std::max(request_size, (ssize_t) this->lb_buffer.available())))
+        {
             break;
         }
     }
 
-    ensure(retval.li_file_range.fr_size <= this->lb_buffer_size);
+    ensure(retval.li_file_range.fr_size <= this->lb_buffer.size());
     ensure(this->invariant());
-
+#if 0
+    log_debug("got line part %d %d",
+              retval.li_file_range.fr_offset,
+              (int) retval.li_partial);
+#endif
     return Ok(retval);
 }
 
@@ -783,8 +1125,12 @@ line_buffer::read_range(const file_range fr)
         return Err(std::string("out-of-bounds"));
     }
 
-    if (!this->fill_range(fr.fr_offset, fr.fr_size)) {
-        return Err(std::string("unable to read file"));
+    if (!(this->in_range(fr.fr_offset)
+          && this->in_range(fr.fr_offset + fr.fr_size - 1)))
+    {
+        if (!this->fill_range(fr.fr_offset, fr.fr_size)) {
+            return Err(std::string("unable to read file"));
+        }
     }
     line_start = this->get_range(fr.fr_offset, avail);
 
@@ -794,13 +1140,14 @@ line_buffer::read_range(const file_range fr)
     }
     retval.share(this->lb_share_manager, line_start, fr.fr_size);
 
-    return Ok(retval);
+    return Ok(std::move(retval));
 }
 
 file_range
 line_buffer::get_available()
 {
-    return {this->lb_file_offset, this->lb_buffer_size};
+    return {this->lb_file_offset,
+            static_cast<file_ssize_t>(this->lb_buffer.size())};
 }
 
 line_buffer::gz_indexed::indexDict::indexDict(const z_stream& s,
diff --git a/src/line_buffer.hh b/src/line_buffer.hh
index 4415ffb1..7f01fae5 100644
--- a/src/line_buffer.hh
+++ b/src/line_buffer.hh
@@ -33,6 +33,7 @@
 #define line_buffer_hh
 
 #include <exception>
+#include <future>
 #include <vector>
 
 #include <errno.h>
@@ -45,6 +46,7 @@
 #include "base/file_range.hh"
 #include "base/lnav_log.hh"
 #include "base/result.h"
+#include "safe/safe.h"
 #include "shared_buffer.hh"
 
 struct line_info {
@@ -139,7 +141,7 @@ public:
     /** Construct an empty line_buffer. */
     line_buffer();
 
-    line_buffer(line_buffer&& other) = default;
+    line_buffer(line_buffer&& other) = delete;
 
     virtual ~line_buffer();
 
@@ -177,16 +179,15 @@ public:
 
     bool is_compressed() const
     {
-        return this->lb_gz_file || this->lb_bz_file;
+        return this->lb_compressed;
     }
 
     file_off_t get_read_offset(file_off_t off) const
     {
         if (this->is_compressed()) {
             return this->lb_compressed_offset;
-        } else {
-            return off;
         }
+        return off;
     }
 
     bool is_data_available(file_off_t off, file_off_t stat_size) const
@@ -212,9 +213,13 @@ public:
 
     bool is_likely_to_flush(file_range prev_line);
 
-    void clear()
+    void flush_at(file_off_t off)
     {
-        this->lb_buffer_size = 0;
+        if (this->in_range(off)) {
+            this->lb_buffer.resize(off - this->lb_file_offset);
+        } else {
+            this->lb_buffer.clear();
+        }
     }
 
     /** Release any resources held by this object. */
@@ -224,15 +229,14 @@ public:
 
         this->lb_file_offset = 0;
         this->lb_file_size = (ssize_t) -1;
-        this->lb_buffer_size = 0;
+        this->lb_buffer.resize(0);
         this->lb_last_line_offset = -1;
     }
 
     /** Check the invariants for this object. */
     bool invariant() const
     {
-        require(this->lb_buffer != nullptr);
-        require(this->lb_buffer_size <= this->lb_buffer_max);
+        require(this->lb_buffer.size() <= this->lb_buffer.capacity());
 
         return true;
     }
@@ -245,7 +249,7 @@ private:
     bool in_range(file_off_t off) const
     {
         return this->lb_file_offset <= off
-            && off < (this->lb_file_offset + this->lb_buffer_size);
+            && off < (this->lb_file_offset + this->lb_buffer.size());
     }
 
     void resize_buffer(size_t new_max);
@@ -284,43 +288,56 @@ private:
      * @return A pointer to the start of the cached data in the internal
      * buffer.
      */
-    char* get_range(file_off_t start, file_ssize_t& avail_out) const
+    char* get_range(file_off_t start, file_ssize_t& avail_out)
     {
         auto buffer_offset = start - this->lb_file_offset;
         char* retval;
 
         require(buffer_offset >= 0);
-        require(this->lb_buffer_size >= buffer_offset);
+        require(this->lb_buffer.size() >= buffer_offset);
 
-        retval = &this->lb_buffer[buffer_offset];
-        avail_out = this->lb_buffer_size - buffer_offset;
+        retval = this->lb_buffer.at(buffer_offset);
+        avail_out = this->lb_buffer.size() - buffer_offset;
 
         return retval;
     }
 
+    bool load_next_buffer();
+
+    using safe_gz_indexed = safe::Safe<gz_indexed>;
+
     shared_buffer lb_share_manager;
 
     auto_fd lb_fd; /*< The file to read data from. */
-    gz_indexed lb_gz_file; /*< File reader for gzipped files. */
-    bool lb_bz_file; /*< Flag set for bzip2 compressed files. */
-    file_off_t lb_compressed_offset; /*< The offset into the compressed file. */
-
-    auto_mem<char> lb_buffer; /*< The internal buffer where data is cached */
-
-    file_ssize_t lb_file_size; /*<
-                                * The size of the file.  When lb_fd refers to
-                                * a pipe, this is set to the amount of data
-                                * read from the pipe when EOF is reached.
-                                */
-    file_off_t lb_file_offset; /*<
-                                * Data cached in the buffer comes from this
-                                * offset in the file.
-                                */
-    time_t lb_file_time;
-    ssize_t lb_buffer_size; /*< The amount of cached data in the buffer. */
-    ssize_t lb_buffer_max; /*< The amount of allocated memory for the
-                            *  buffer. */
-    bool lb_seekable; /*< Flag set for seekable file descriptors. */
-    file_off_t lb_last_line_offset; /*< */
+    safe_gz_indexed lb_gz_file; /*< File reader for gzipped files. */
+    bool lb_bz_file{false}; /*< Flag set for bzip2 compressed files. */
+
+    auto_buffer lb_buffer{auto_buffer::alloc(DEFAULT_LINE_BUFFER_SIZE)};
+    nonstd::optional<auto_buffer> lb_alt_buffer;
+    std::vector<uint32_t> lb_alt_line_starts;
+    std::vector<bool> lb_alt_line_is_utf;
+    std::future<bool> lb_loader_future;
+    nonstd::optional<file_off_t> lb_loader_file_offset;
+
+    file_off_t lb_compressed_offset{
+        0}; /*< The offset into the compressed file. */
+    file_ssize_t lb_file_size{
+        -1}; /*<
+              * The size of the file.  When lb_fd refers to
+              * a pipe, this is set to the amount of data
+              * read from the pipe when EOF is reached.
+              */
+    file_off_t lb_file_offset{0}; /*<
+                                   * Data cached in the buffer comes from this
+                                   * offset in the file.
+                                   */
+    time_t lb_file_time{0};
+    bool lb_seekable{false}; /*< Flag set for seekable file descriptors. */
+    bool lb_compressed{false};
+    file_off_t lb_last_line_offset{-1}; /*< */
+
+    std::vector<uint32_t> lb_line_starts;
+    std::vector<bool> lb_line_is_utf;
 };
+
 #endif
diff --git a/src/lnav_commands.cc b/src/lnav_commands.cc
index d838b2bc..8b3d0bfc 100644
--- a/src/lnav_commands.cc
+++ b/src/lnav_commands.cc
@@ -1365,8 +1365,9 @@ com_save_to(exec_context& ec,
         lnav_data.ld_preview_status_source.get_description().set_value(
             "First lines of file: %s", split_args[0].c_str());
     } else {
-        retval = "info: Wrote " + std::to_string(line_count) + " rows to "
-            + split_args[0];
+        retval = fmt::format(FMT_STRING("info: Wrote {:L} rows to {}"),
+                             line_count,
+                             split_args[0]);
     }
     if (toclose != nullptr) {
         closer(toclose);
diff --git a/src/log_format.cc b/src/log_format.cc
index 72731091..11027a03 100644
--- a/src/log_format.cc
+++ b/src/log_format.cc
@@ -409,7 +409,10 @@ log_format::check_for_new_year(std::vector<logline>& dst,
  * XXX This needs some cleanup.
  */
 struct json_log_userdata {
-    json_log_userdata(shared_buffer_ref& sbr) : jlu_shared_buffer(sbr) {}
+    json_log_userdata(shared_buffer_ref& sbr, scan_batch_context* sbc)
+        : jlu_shared_buffer(sbr), jlu_batch_context(sbc)
+    {
+    }
 
     external_log_format* jlu_format{nullptr};
     const logline* jlu_line{nullptr};
@@ -420,6 +423,7 @@ struct json_log_userdata {
     size_t jlu_line_size{0};
     size_t jlu_sub_start{0};
     shared_buffer_ref& jlu_shared_buffer;
+    scan_batch_context* jlu_batch_context;
 };
 
 static int read_json_field(yajlpp_parse_context* ypc,
@@ -472,8 +476,8 @@ read_json_int(yajlpp_parse_context* ypc, long long val)
             pcre_input pi(level_buf);
             pcre_context::capture_t level_cap = {0, (int) strlen(level_buf)};
 
-            jlu->jlu_base_line->set_level(
-                jlu->jlu_format->convert_level(pi, &level_cap));
+            jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level(
+                pi, &level_cap, jlu->jlu_batch_context));
         } else {
             std::vector<std::pair<int64_t, log_level_t>>::iterator iter;
 
@@ -690,7 +694,7 @@ external_log_format::scan(logfile& lf,
         yajlpp_parse_context& ypc = *(this->jlf_parse_context);
         logline ll(li.li_file_range.fr_offset, 0, 0, LEVEL_INFO);
         yajl_handle handle = this->jlf_yajl_handle.get();
-        json_log_userdata jlu(sbr);
+        json_log_userdata jlu(sbr, &sbc);
 
         if (!this->lf_specialized && dst.size() >= 3) {
             return log_format::SCAN_NO_MATCH;
@@ -842,7 +846,7 @@ external_log_format::scan(logfile& lf,
             }
         }
 
-        auto level = this->convert_level(pi, level_cap);
+        auto level = this->convert_level(pi, level_cap, &sbc);
 
         this->lf_timestamp_flags = log_time_tm.et_flags;
 
@@ -901,11 +905,13 @@ external_log_format::scan(logfile& lf,
                     pattern& mod_pat
                         = *mod_elf->elf_pattern_order[mod_pat_index];
 
-                    if (mod_pat.p_pcre->match(mod_pc, mod_pi)) {
+                    if (mod_pat.p_pcre->match(
+                            mod_pc, mod_pi, PCRE_NO_UTF8_CHECK)) {
                         auto* mod_level_cap
                             = mod_pc[mod_pat.p_level_field_index];
 
-                        level = mod_elf->convert_level(mod_pi, mod_level_cap);
+                        level = mod_elf->convert_level(
+                            mod_pi, mod_level_cap, &sbc);
                     }
                 }
             }
@@ -974,6 +980,7 @@ external_log_format::scan(logfile& lf,
     if (this->lf_specialized && !this->lf_multiline) {
         auto& last_line = dst.back();
 
+        log_debug("invalid line %d %d", dst.size(), li.li_file_range.fr_offset);
         dst.emplace_back(li.li_file_range.fr_offset,
                          last_line.get_timeval(),
                          log_level_t::LEVEL_INVALID);
@@ -1246,19 +1253,20 @@ read_json_field(yajlpp_parse_context* ypc, const unsigned char* str, size_t len)
         pcre_context_static<30> pc;
         pcre_input pi(field_name);
 
-        if (jlu->jlu_format->elf_level_pointer.match(pc, pi)) {
+        if (jlu->jlu_format->elf_level_pointer.match(
+                pc, pi, PCRE_NO_UTF8_CHECK)) {
             pcre_input pi_level((const char*) str, 0, len);
             pcre_context::capture_t level_cap = {0, (int) len};
 
-            jlu->jlu_base_line->set_level(
-                jlu->jlu_format->convert_level(pi_level, &level_cap));
+            jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level(
+                pi_level, &level_cap, jlu->jlu_batch_context));
         }
     } else if (jlu->jlu_format->elf_level_field == field_name) {
         pcre_input pi((const char*) str, 0, len);
         pcre_context::capture_t level_cap = {0, (int) len};
 
-        jlu->jlu_base_line->set_level(
-            jlu->jlu_format->convert_level(pi, &level_cap));
+        jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level(
+            pi, &level_cap, jlu->jlu_batch_context));
     } else if (jlu->jlu_format->elf_opid_field == field_name) {
         uint8_t opid = hash_str((const char*) str, len);
         jlu->jlu_base_line->set_opid(opid);
@@ -1359,7 +1367,7 @@ external_log_format::get_subline(const logline& ll,
     {
         yajlpp_parse_context& ypc = *(this->jlf_parse_context);
         yajl_handle handle = this->jlf_yajl_handle.get();
-        json_log_userdata jlu(sbr);
+        json_log_userdata jlu(sbr, nullptr);
 
         this->jlf_share_manager.invalidate_refs();
         this->jlf_cached_line.clear();
@@ -2025,7 +2033,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors)
                                        .append(" property")));
             }
 
-            log_level_t level = this->convert_level(pi, level_cap);
+            log_level_t level = this->convert_level(pi, level_cap, nullptr);
 
             if (elf_sample.s_level != LEVEL_UNKNOWN
                 && elf_sample.s_level != level) {
@@ -2565,12 +2573,37 @@ external_log_format::value_line_count(const intern_string_t ist,
 }
 
 log_level_t
-external_log_format::convert_level(
-    const pcre_input& pi, const pcre_context::capture_t* level_cap) const
+external_log_format::convert_level(const pcre_input& pi,
+                                   const pcre_context::capture_t* level_cap,
+                                   scan_batch_context* sbc) const
 {
     log_level_t retval = LEVEL_INFO;
 
     if (level_cap != nullptr && level_cap->is_valid()) {
+        if (sbc != nullptr && sbc->sbc_cached_level_count > 0) {
+            auto sf = pi.get_string_fragment(level_cap);
+            auto cached_level_iter
+                = std::find(std::begin(sbc->sbc_cached_level_strings),
+                            std::begin(sbc->sbc_cached_level_strings)
+                                + sbc->sbc_cached_level_count,
+                            sf);
+            if (cached_level_iter
+                != std::begin(sbc->sbc_cached_level_strings)
+                    + sbc->sbc_cached_level_count)
+            {
+                auto cache_index
+                    = std::distance(std::begin(sbc->sbc_cached_level_strings),
+                                    cached_level_iter);
+                if (cache_index != 0) {
+                    std::swap(sbc->sbc_cached_level_strings[cache_index],
+                              sbc->sbc_cached_level_strings[0]);
+                    std::swap(sbc->sbc_cached_level_values[cache_index],
+                              sbc->sbc_cached_level_values[0]);
+                }
+                return sbc->sbc_cached_level_values[0];
+            }
+        }
+
         pcre_context_static<128> pc_level;
         pcre_input pi_level(
             pi.get_substr_start(level_cap), 0, level_cap->length());
@@ -2579,13 +2612,28 @@ external_log_format::convert_level(
             retval = string2level(pi_level.get_string(), level_cap->length());
         } else {
             for (const auto& elf_level_pattern : this->elf_level_patterns) {
-                if (elf_level_pattern.second.lp_pcre->match(pc_level, pi_level))
+                if (elf_level_pattern.second.lp_pcre->match(
+                        pc_level, pi_level, PCRE_NO_UTF8_CHECK))
                 {
                     retval = elf_level_pattern.first;
                     break;
                 }
             }
         }
+
+        if (sbc != nullptr && level_cap->length() < 10) {
+            size_t cache_index;
+
+            if (sbc->sbc_cached_level_count == 4) {
+                cache_index = sbc->sbc_cached_level_count - 1;
+            } else {
+                cache_index = sbc->sbc_cached_level_count;
+                sbc->sbc_cached_level_count += 1;
+            }
+            sbc->sbc_cached_level_strings[cache_index]
+                = std::string(pi_level.get_string(), pi_level.pi_length);
+            sbc->sbc_cached_level_values[cache_index] = retval;
+        }
     }
 
     return retval;
diff --git a/src/log_format_ext.hh b/src/log_format_ext.hh
index 04015c79..7286d86f 100644
--- a/src/log_format_ext.hh
+++ b/src/log_format_ext.hh
@@ -314,7 +314,8 @@ public:
     }
 
     log_level_t convert_level(const pcre_input& pi,
-                              const pcre_context::capture_t* level_cap) const;
+                              const pcre_context::capture_t* level_cap,
+                              scan_batch_context* sbc) const;
 
     using mod_map_t = std::map<intern_string_t, module_format>;
     static mod_map_t MODULE_FORMATS;
diff --git a/src/log_format_fwd.hh b/src/log_format_fwd.hh
index 51e8adb0..bffc7f74 100644
--- a/src/log_format_fwd.hh
+++ b/src/log_format_fwd.hh
@@ -32,8 +32,6 @@
 #ifndef lnav_log_format_fwd_hh
 #define lnav_log_format_fwd_hh
 
-#include <unordered_map>
-
 #include <sys/types.h>
 
 #include "ArenaAlloc/arenaalloc.h"
@@ -42,6 +40,7 @@
 #include "byte_array.hh"
 #include "log_level.hh"
 #include "ptimec.hh"
+#include "robin_hood/robin_hood.h"
 
 class log_format;
 
@@ -50,16 +49,17 @@ struct opid_time_range {
     struct timeval otr_end;
 };
 
-using log_opid_map = std::unordered_map<
-    string_fragment,
-    opid_time_range,
-    frag_hasher,
-    std::equal_to<string_fragment>,
-    ArenaAlloc::Alloc<std::pair<const string_fragment, opid_time_range>>>;
+using log_opid_map = robin_hood::unordered_map<string_fragment,
+                                               opid_time_range,
+                                               frag_hasher,
+                                               std::equal_to<string_fragment>>;
 
 struct scan_batch_context {
     ArenaAlloc::Alloc<char>& sbc_allocator;
     log_opid_map sbc_opids;
+    std::string sbc_cached_level_strings[4];
+    log_level_t sbc_cached_level_values[4];
+    size_t sbc_cached_level_count{0};
 };
 
 /**
diff --git a/src/logfile.cc b/src/logfile.cc
index 174143cf..1352140b 100644
--- a/src/logfile.cc
+++ b/src/logfile.cc
@@ -127,6 +127,7 @@ logfile::open(std::string filename, logfile_open_options& loo)
 logfile::logfile(std::string filename, logfile_open_options& loo)
     : lf_filename(std::move(filename)), lf_options(std::move(loo))
 {
+    this->lf_opids.writeAccess()->reserve(64);
 }
 
 logfile::~logfile() {}
@@ -414,12 +415,13 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
             this->lf_index.pop_back();
             rollback_size += 1;
 
-            this->lf_line_buffer.clear();
             if (!this->lf_index.empty()) {
                 auto last_line = this->lf_index.end();
                 --last_line;
                 auto check_line_off = last_line->get_offset();
                 auto last_length = ssize_t(this->line_length(last_line, false));
+                log_debug("flushing at %d", check_line_off);
+                this->lf_line_buffer.flush_at(check_line_off);
 
                 auto read_result = this->lf_line_buffer.read_range(
                     {check_line_off, last_length});
@@ -463,6 +465,7 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
                 "loading file... %s:%d", this->lf_filename.c_str(), begin_size);
         }
         scan_batch_context sbc{this->lf_allocator};
+        sbc.sbc_opids.reserve(32);
         auto prev_range = file_range{off};
         while (limit > 0) {
             auto load_result = this->lf_line_buffer.load_next_line(prev_range);
@@ -525,7 +528,8 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
                 return rebuild_result_t::INVALID;
             }
 
-            auto sbr = read_result.unwrap().rtrim(is_line_ending);
+            auto sbr = read_result.unwrap();
+            sbr.rtrim(is_line_ending);
             this->lf_longest_line
                 = std::max(this->lf_longest_line, sbr.length());
             this->lf_partial_line = li.li_partial;
@@ -622,6 +626,7 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline)
         this->lf_index_size = prev_range.next_offset();
         this->lf_stat = st;
 
+        log_debug("batch opid count %d", sbc.sbc_opids.size());
         {
             safe::WriteAccess<logfile::safe_opid_map> writable_opid_map(
                 this->lf_opids);
diff --git a/src/logfile.hh b/src/logfile.hh
index 217edbbe..ba97eba3 100644
--- a/src/logfile.hh
+++ b/src/logfile.hh
@@ -34,7 +34,6 @@
 #define logfile_hh
 
 #include <string>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -420,7 +419,7 @@ private:
     safe_notes lf_notes;
     safe_opid_map lf_opids;
     size_t lf_watch_count{0};
-    ArenaAlloc::Alloc<char> lf_allocator;
+    ArenaAlloc::Alloc<char> lf_allocator{64 * 1024};
 
     nonstd::optional<std::pair<file_off_t, size_t>> lf_next_line_cache;
 };
diff --git a/src/ptimec.c b/src/ptimec.c
index 78c42a97..45059d6a 100644
--- a/src/ptimec.c
+++ b/src/ptimec.c
@@ -63,10 +63,12 @@ int main(int argc, char *argv[])
         const char* arg = argv[lpc];
 
         printf(
+            "// %s\n"
             "bool ptime_f%d(struct exttm *dst, const char *str, off_t &off, "
             "ssize_t len) {\n"
             "    dst->et_flags = 0;\n"
             "    // log_debug(\"ptime_f%d\");\n",
+            arg,
             lpc,
             lpc);
         for (int index = 0; arg[index]; arg++) {
diff --git a/src/shared_buffer.cc b/src/shared_buffer.cc
index 4dffef4e..76233100 100644
--- a/src/shared_buffer.cc
+++ b/src/shared_buffer.cc
@@ -93,11 +93,13 @@ shared_buffer_ref::shared_buffer_ref(shared_buffer_ref&& other) noexcept
         this->sb_data = nullptr;
         this->sb_length = 0;
     } else if (other.sb_owner != nullptr) {
-        other.sb_owner->add_ref(*this);
-        this->sb_owner = other.sb_owner;
-        this->sb_data = other.sb_data;
-        this->sb_length = other.sb_length;
-        other.disown();
+        auto owner_ref_iter = std::find(other.sb_owner->sb_refs.begin(),
+                                        other.sb_owner->sb_refs.end(),
+                                        &other);
+        *owner_ref_iter = this;
+        this->sb_owner = std::exchange(other.sb_owner, nullptr);
+        this->sb_data = std::exchange(other.sb_data, nullptr);
+        this->sb_length = std::exchange(other.sb_length, 0);
     } else {
         this->sb_owner = nullptr;
         this->sb_data = other.sb_data;
diff --git a/src/string-extension-functions.cc b/src/string-extension-functions.cc
index 8774cac0..14c0398c 100644
--- a/src/string-extension-functions.cc
+++ b/src/string-extension-functions.cc
@@ -387,10 +387,8 @@ sql_gunzip(sqlite3_value* val)
             auto len = sqlite3_value_bytes(val);
 
             if (!lnav::gzip::is_gzipped((const char*) buffer, len)) {
-                auto retval = auto_buffer::alloc(len);
-
-                memcpy(retval.in(), buffer, len);
-                return blob_auto_buffer{std::move(retval)};
+                return blob_auto_buffer{
+                    auto_buffer::from((const char*) buffer, len)};
             }
 
             auto res = lnav::gzip::uncompress("", buffer, len);
diff --git a/src/textfile_sub_source.hh b/src/textfile_sub_source.hh
index 8572a0e0..e4750386 100644
--- a/src/textfile_sub_source.hh
+++ b/src/textfile_sub_source.hh
@@ -31,6 +31,7 @@
 #define textfile_sub_source_hh
 
 #include <deque>
+#include <unordered_map>
 
 #include "filter_observer.hh"
 #include "logfile.hh"
diff --git a/src/third-party/robin_hood/robin_hood.h b/src/third-party/robin_hood/robin_hood.h
new file mode 100644
index 00000000..0af031f5
--- /dev/null
+++ b/src/third-party/robin_hood/robin_hood.h
@@ -0,0 +1,2544 @@
+//                 ______  _____                 ______                _________
+//  ______________ ___  /_ ___(_)_______         ___  /_ ______ ______ ______  /
+//  __  ___/_  __ \__  __ \__  / __  __ \        __  __ \_  __ \_  __ \_  __  /
+//  _  /    / /_/ /_  /_/ /_  /  _  / / /        _  / / // /_/ // /_/ // /_/ /
+//  /_/     \____/ /_.___/ /_/   /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/
+//                                      _/_____/
+//
+// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
+// https://github.com/martinus/robin-hood-hashing
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2021 Martin Ankerl <http://martin.ankerl.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef ROBIN_HOOD_H_INCLUDED
+#define ROBIN_HOOD_H_INCLUDED
+
+// see https://semver.org/
+#define ROBIN_HOOD_VERSION_MAJOR 3  // for incompatible API changes
+#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner
+#define ROBIN_HOOD_VERSION_PATCH 5  // for backwards-compatible bug fixes
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory> // only to support hash of smart pointers
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#if __cplusplus >= 201703L
+#    include <string_view>
+#endif
+
+// #define ROBIN_HOOD_LOG_ENABLED
+#ifdef ROBIN_HOOD_LOG_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_LOG(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_LOG(x)
+#endif
+
+// #define ROBIN_HOOD_TRACE_ENABLED
+#ifdef ROBIN_HOOD_TRACE_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_TRACE(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_TRACE(x)
+#endif
+
+// #define ROBIN_HOOD_COUNT_ENABLED
+#ifdef ROBIN_HOOD_COUNT_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_COUNT(x) ++counts().x;
+namespace robin_hood {
+struct Counts {
+    uint64_t shiftUp{};
+    uint64_t shiftDown{};
+};
+inline std::ostream& operator<<(std::ostream& os, Counts const& c) {
+    return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl;
+}
+
+static Counts& counts() {
+    static Counts counts{};
+    return counts;
+}
+} // namespace robin_hood
+#else
+#    define ROBIN_HOOD_COUNT(x)
+#endif
+
+// all non-argument macros should use this facility. See
+// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/
+#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x()
+
+// mark unused members with this macro
+#define ROBIN_HOOD_UNUSED(identifier)
+
+// bitness
+#if SIZE_MAX == UINT32_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32
+#elif SIZE_MAX == UINT64_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64
+#else
+#    error Unsupported bitness
+#endif
+
+// endianess
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \
+        (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#endif
+
+// inline
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline)
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline))
+#endif
+
+// exceptions
+#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1
+#endif
+
+// count leading/trailing bits
+#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+#    ifdef _MSC_VER
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
+#        endif
+#        include <intrin.h>
+#        pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x)                                       \
+            [](size_t mask) noexcept -> int {                                             \
+                unsigned long index;                                                      \
+                return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
+                                                                : ROBIN_HOOD(BITNESS);    \
+            }(x)
+#    else
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
+#        endif
+#        define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
+#    endif
+#endif
+
+// fallthrough
+#ifndef __has_cpp_attribute // For backwards compatibility
+#    define __has_cpp_attribute(x) 0
+#endif
+#if __has_cpp_attribute(clang::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH()
+#endif
+
+// likely/unlikely
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_LIKELY(condition) condition
+#    define ROBIN_HOOD_UNLIKELY(condition) condition
+#else
+#    define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1)
+#    define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0)
+#endif
+
+// detect if native wchar_t type is availiable in MSVC
+#ifdef _MSC_VER
+#    ifdef _NATIVE_WCHAR_T_DEFINED
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#endif
+
+// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr
+#ifdef _MSC_VER
+#    if _MSC_VER <= 1900
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0
+#endif
+
+// workaround missing "is_trivially_copyable" in g++ < 5.0
+// See https://stackoverflow.com/a/31798726/48181
+#if defined(__GNUC__) && __GNUC__ < 5
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#else
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
+#endif
+
+// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD()
+#endif
+
+namespace robin_hood {
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+#    define ROBIN_HOOD_STD std
+#else
+
+// c++11 compatibility layer
+namespace ROBIN_HOOD_STD {
+template <class T>
+struct alignment_of
+    : std::integral_constant<std::size_t, alignof(typename std::remove_all_extents<T>::type)> {};
+
+template <class T, T... Ints>
+class integer_sequence {
+public:
+    using value_type = T;
+    static_assert(std::is_integral<value_type>::value, "not integral type");
+    static constexpr std::size_t size() noexcept {
+        return sizeof...(Ints);
+    }
+};
+template <std::size_t... Inds>
+using index_sequence = integer_sequence<std::size_t, Inds...>;
+
+namespace detail_ {
+template <class T, T Begin, T End, bool>
+struct IntSeqImpl {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)");
+
+    template <class, class>
+    struct IntSeqCombiner;
+
+    template <TValue... Inds0, TValue... Inds1>
+    struct IntSeqCombiner<integer_sequence<TValue, Inds0...>, integer_sequence<TValue, Inds1...>> {
+        using TResult = integer_sequence<TValue, Inds0..., Inds1...>;
+    };
+
+    using TResult =
+        typename IntSeqCombiner<typename IntSeqImpl<TValue, Begin, Begin + (End - Begin) / 2,
+                                                    (End - Begin) / 2 == 1>::TResult,
+                                typename IntSeqImpl<TValue, Begin + (End - Begin) / 2, End,
+                                                    (End - Begin + 1) / 2 == 1>::TResult>::TResult;
+};
+
+template <class T, T Begin>
+struct IntSeqImpl<T, Begin, Begin, false> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue>;
+};
+
+template <class T, T Begin, T End>
+struct IntSeqImpl<T, Begin, End, true> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue, Begin>;
+};
+} // namespace detail_
+
+template <class T, T N>
+using make_integer_sequence = typename detail_::IntSeqImpl<T, 0, N, (N - 0) == 1>::TResult;
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+template <class... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+} // namespace ROBIN_HOOD_STD
+
+#endif
+
+namespace detail {
+
+// make sure we static_cast to the correct type for hash_int
+#if ROBIN_HOOD(BITNESS) == 64
+using SizeT = uint64_t;
+#else
+using SizeT = uint32_t;
+#endif
+
+template <typename T>
+T rotr(T x, unsigned k) {
+    return (x >> k) | (x << (8U * sizeof(T) - k));
+}
+
+// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to
+// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with
+// care!
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
+// inlinings more difficult. Throws are also generally the slow path.
+template <typename E, typename... Args>
+[[noreturn]] ROBIN_HOOD(NOINLINE)
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+    void doThrow(Args&&... args) {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+    throw E(std::forward<Args>(args)...);
+}
+#else
+    void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
+    abort();
+}
+#endif
+
+template <typename E, typename T, typename... Args>
+T* assertNotNull(T* t, Args&&... args) {
+    if (ROBIN_HOOD_UNLIKELY(nullptr == t)) {
+        doThrow<E>(std::forward<Args>(args)...);
+    }
+    return t;
+}
+
+template <typename T>
+inline T unaligned_load(void const* ptr) noexcept {
+    // using memcpy so we don't get into unaligned load problems.
+    // compiler should optimize this very well anyways.
+    T t;
+    std::memcpy(&t, ptr, sizeof(T));
+    return t;
+}
+
+// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor,
+// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a
+// pointer.
+template <typename T, size_t MinNumAllocs = 4, size_t MaxNumAllocs = 256>
+class BulkPoolAllocator {
+public:
+    BulkPoolAllocator() noexcept = default;
+
+    // does not copy anything, just creates a new allocator.
+    BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept
+        : mHead(nullptr)
+        , mListForFree(nullptr) {}
+
+    BulkPoolAllocator(BulkPoolAllocator&& o) noexcept
+        : mHead(o.mHead)
+        , mListForFree(o.mListForFree) {
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+    }
+
+    BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept {
+        reset();
+        mHead = o.mHead;
+        mListForFree = o.mListForFree;
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+        return *this;
+    }
+
+    BulkPoolAllocator&
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept {
+        // does not do anything
+        return *this;
+    }
+
+    ~BulkPoolAllocator() noexcept {
+        reset();
+    }
+
+    // Deallocates all allocated memory.
+    void reset() noexcept {
+        while (mListForFree) {
+            T* tmp = *mListForFree;
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mListForFree);
+            mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        }
+        mHead = nullptr;
+    }
+
+    // allocates, but does NOT initialize. Use in-place new constructor, e.g.
+    //   T* obj = pool.allocate();
+    //   ::new (static_cast<void*>(obj)) T();
+    T* allocate() {
+        T* tmp = mHead;
+        if (!tmp) {
+            tmp = performAllocation();
+        }
+
+        mHead = *reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        return tmp;
+    }
+
+    // does not actually deallocate but puts it in store.
+    // make sure you have already called the destructor! e.g. with
+    //  obj->~T();
+    //  pool.deallocate(obj);
+    void deallocate(T* obj) noexcept {
+        *reinterpret_cast_no_cast_align_warning<T**>(obj) = mHead;
+        mHead = obj;
+    }
+
+    // Adds an already allocated block of memory to the allocator. This allocator is from now on
+    // responsible for freeing the data (with free()). If the provided data is not large enough to
+    // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor.
+    void addOrFree(void* ptr, const size_t numBytes) noexcept {
+        // calculate number of available elements in ptr
+        if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
+            // not enough data for at least one element. Free and return.
+            ROBIN_HOOD_LOG("std::free")
+            std::free(ptr);
+        } else {
+            ROBIN_HOOD_LOG("add to buffer")
+            add(ptr, numBytes);
+        }
+    }
+
+    void swap(BulkPoolAllocator<T, MinNumAllocs, MaxNumAllocs>& other) noexcept {
+        using std::swap;
+        swap(mHead, other.mHead);
+        swap(mListForFree, other.mListForFree);
+    }
+
+private:
+    // iterates the list of allocated memory to calculate how many to alloc next.
+    // Recalculating this each time saves us a size_t member.
+    // This ignores the fact that memory blocks might have been added manually with addOrFree. In
+    // practice, this should not matter much.
+    ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept {
+        auto tmp = mListForFree;
+        size_t numAllocs = MinNumAllocs;
+
+        while (numAllocs * 2 <= MaxNumAllocs && tmp) {
+            auto x = reinterpret_cast<T***>(tmp);
+            tmp = *x;
+            numAllocs *= 2;
+        }
+
+        return numAllocs;
+    }
+
+    // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree().
+    void add(void* ptr, const size_t numBytes) noexcept {
+        const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE;
+
+        auto data = reinterpret_cast<T**>(ptr);
+
+        // link free list
+        auto x = reinterpret_cast<T***>(data);
+        *x = mListForFree;
+        mListForFree = data;
+
+        // create linked list for newly allocated data
+        auto* const headT =
+            reinterpret_cast_no_cast_align_warning<T*>(reinterpret_cast<char*>(ptr) + ALIGNMENT);
+
+        auto* const head = reinterpret_cast<char*>(headT);
+
+        // Visual Studio compiler automatically unrolls this loop, which is pretty cool
+        for (size_t i = 0; i < numElements; ++i) {
+            *reinterpret_cast_no_cast_align_warning<char**>(head + i * ALIGNED_SIZE) =
+                head + (i + 1) * ALIGNED_SIZE;
+        }
+
+        // last one points to 0
+        *reinterpret_cast_no_cast_align_warning<T**>(head + (numElements - 1) * ALIGNED_SIZE) =
+            mHead;
+        mHead = headT;
+    }
+
+    // Called when no memory is available (mHead == 0).
+    // Don't inline this slow path.
+    ROBIN_HOOD(NOINLINE) T* performAllocation() {
+        size_t const numElementsToAlloc = calcNumElementsToAlloc();
+
+        // alloc new memory: [prev |T, T, ... T]
+        size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
+        ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
+                                      << " * " << numElementsToAlloc)
+        add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
+        return mHead;
+    }
+
+    // enforce byte alignment of the T's
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+    static constexpr size_t ALIGNMENT =
+        (std::max)(std::alignment_of<T>::value, std::alignment_of<T*>::value);
+#else
+    static const size_t ALIGNMENT =
+        (ROBIN_HOOD_STD::alignment_of<T>::value > ROBIN_HOOD_STD::alignment_of<T*>::value)
+            ? ROBIN_HOOD_STD::alignment_of<T>::value
+            : +ROBIN_HOOD_STD::alignment_of<T*>::value; // the + is for walkarround
+#endif
+
+    static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT;
+
+    static_assert(MinNumAllocs >= 1, "MinNumAllocs");
+    static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs");
+    static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE");
+    static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod");
+    static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT");
+
+    T* mHead{nullptr};
+    T** mListForFree{nullptr};
+};
+
+template <typename T, size_t MinSize, size_t MaxSize, bool IsFlat>
+struct NodeAllocator;
+
+// dummy allocator that does nothing
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, true> {
+
+    // we are not using the data, so just free it.
+    void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
+        ROBIN_HOOD_LOG("std::free")
+        std::free(ptr);
+    }
+};
+
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, false> : public BulkPoolAllocator<T, MinSize, MaxSize> {};
+
+// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making
+// my own here.
+namespace swappable {
+#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17)
+using std::swap;
+template <typename T>
+struct nothrow {
+    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
+};
+#else
+template <typename T>
+struct nothrow {
+    static const bool value = std::is_nothrow_swappable<T>::value;
+};
+#endif
+} // namespace swappable
+
+} // namespace detail
+
+struct is_transparent_tag {};
+
+// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable,
+// which means it would  not be allowed to be used in std::memcpy. This struct is copyable, which is
+// also tested.
+template <typename T1, typename T2>
+struct pair {
+    using first_type = T1;
+    using second_type = T2;
+
+    template <typename U1 = T1, typename U2 = T2,
+              typename = typename std::enable_if<std::is_default_constructible<U1>::value &&
+                                                 std::is_default_constructible<U2>::value>::type>
+    constexpr pair() noexcept(noexcept(U1()) && noexcept(U2()))
+        : first()
+        , second() {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2> const& o) noexcept(
+        noexcept(T1(std::declval<T1 const&>())) && noexcept(T2(std::declval<T2 const&>())))
+        : first(o.first)
+        , second(o.second) {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2>&& o) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(o.first))
+        , second(std::move(o.second)) {}
+
+    constexpr pair(T1&& a, T2&& b) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(a))
+        , second(std::move(b)) {}
+
+    template <typename U1, typename U2>
+    constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward<U1>(
+        std::declval<U1&&>()))) && noexcept(T2(std::forward<U2>(std::declval<U2&&>()))))
+        : first(std::forward<U1>(a))
+        , second(std::forward<U2>(b)) {}
+
+    template <typename... U1, typename... U2>
+    // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members"
+    // if this constructor is constexpr
+#if !ROBIN_HOOD(BROKEN_CONSTEXPR)
+    constexpr
+#endif
+        pair(std::piecewise_construct_t /*unused*/, std::tuple<U1...> a,
+             std::tuple<U2...>
+                 b) noexcept(noexcept(pair(std::declval<std::tuple<U1...>&>(),
+                                           std::declval<std::tuple<U2...>&>(),
+                                           ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+                                           ROBIN_HOOD_STD::index_sequence_for<U2...>())))
+        : pair(a, b, ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+               ROBIN_HOOD_STD::index_sequence_for<U2...>()) {
+    }
+
+    // constructor called from the std::piecewise_construct_t ctor
+    template <typename... U1, size_t... I1, typename... U2, size_t... I2>
+    pair(std::tuple<U1...>& a, std::tuple<U2...>& b, ROBIN_HOOD_STD::index_sequence<I1...> /*unused*/, ROBIN_HOOD_STD::index_sequence<I2...> /*unused*/) noexcept(
+        noexcept(T1(std::forward<U1>(std::get<I1>(
+            std::declval<std::tuple<
+                U1...>&>()))...)) && noexcept(T2(std::
+                                                     forward<U2>(std::get<I2>(
+                                                         std::declval<std::tuple<U2...>&>()))...)))
+        : first(std::forward<U1>(std::get<I1>(a))...)
+        , second(std::forward<U2>(std::get<I2>(b))...) {
+        // make visual studio compiler happy about warning about unused a & b.
+        // Visual studio's pair implementation disables warning 4100.
+        (void)a;
+        (void)b;
+    }
+
+    void swap(pair<T1, T2>& o) noexcept((detail::swappable::nothrow<T1>::value) &&
+                                        (detail::swappable::nothrow<T2>::value)) {
+        using std::swap;
+        swap(first, o.first);
+        swap(second, o.second);
+    }
+
+    T1 first;  // NOLINT(misc-non-private-member-variables-in-classes)
+    T2 second; // NOLINT(misc-non-private-member-variables-in-classes)
+};
+
+template <typename A, typename B>
+inline void swap(pair<A, B>& a, pair<A, B>& b) noexcept(
+    noexcept(std::declval<pair<A, B>&>().swap(std::declval<pair<A, B>&>()))) {
+    a.swap(b);
+}
+
+template <typename A, typename B>
+inline constexpr bool operator==(pair<A, B> const& x, pair<A, B> const& y) {
+    return (x.first == y.first) && (x.second == y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator!=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x == y);
+}
+template <typename A, typename B>
+inline constexpr bool operator<(pair<A, B> const& x, pair<A, B> const& y) noexcept(noexcept(
+    std::declval<A const&>() < std::declval<A const&>()) && noexcept(std::declval<B const&>() <
+                                                                     std::declval<B const&>())) {
+    return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator>(pair<A, B> const& x, pair<A, B> const& y) {
+    return y < x;
+}
+template <typename A, typename B>
+inline constexpr bool operator<=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x > y);
+}
+template <typename A, typename B>
+inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x < y);
+}
+
+inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
+    static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
+    static constexpr uint64_t seed = UINT64_C(0xe17a1465);
+    static constexpr unsigned int r = 47;
+
+    auto const* const data64 = static_cast<uint64_t const*>(ptr);
+    uint64_t h = seed ^ (len * m);
+
+    size_t const n_blocks = len / 8;
+    for (size_t i = 0; i < n_blocks; ++i) {
+        auto k = detail::unaligned_load<uint64_t>(data64 + i);
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h ^= k;
+        h *= m;
+    }
+
+    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
+    switch (len & 7U) {
+    case 7:
+        h ^= static_cast<uint64_t>(data8[6]) << 48U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 6:
+        h ^= static_cast<uint64_t>(data8[5]) << 40U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 5:
+        h ^= static_cast<uint64_t>(data8[4]) << 32U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 4:
+        h ^= static_cast<uint64_t>(data8[3]) << 24U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 3:
+        h ^= static_cast<uint64_t>(data8[2]) << 16U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 2:
+        h ^= static_cast<uint64_t>(data8[1]) << 8U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 1:
+        h ^= static_cast<uint64_t>(data8[0]);
+        h *= m;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    default:
+        break;
+    }
+
+    h ^= h >> r;
+
+    // not doing the final step here, because this will be done by keyToIdx anyways
+    // h *= m;
+    // h ^= h >> r;
+    return static_cast<size_t>(h);
+}
+
+inline size_t hash_int(uint64_t x) noexcept {
+    // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested,
+    // and doesn't need any special 128bit operations.
+    x ^= x >> 33U;
+    x *= UINT64_C(0xff51afd7ed558ccd);
+    x ^= x >> 33U;
+
+    // not doing the final step here, because this will be done by keyToIdx anyways
+    // x *= UINT64_C(0xc4ceb9fe1a85ec53);
+    // x ^= x >> 33U;
+    return static_cast<size_t>(x);
+}
+
+// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
+template <typename T, typename Enable = void>
+struct hash : public std::hash<T> {
+    size_t operator()(T const& obj) const
+        noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
+        // call base hash
+        auto result = std::hash<T>::operator()(obj);
+        // return mixed of that, to be save against identity has
+        return hash_int(static_cast<detail::SizeT>(result));
+    }
+};
+
+template <typename CharT>
+struct hash<std::basic_string<CharT>> {
+    size_t operator()(std::basic_string<CharT> const& str) const noexcept {
+        return hash_bytes(str.data(), sizeof(CharT) * str.size());
+    }
+};
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+template <typename CharT>
+struct hash<std::basic_string_view<CharT>> {
+    size_t operator()(std::basic_string_view<CharT> const& sv) const noexcept {
+        return hash_bytes(sv.data(), sizeof(CharT) * sv.size());
+    }
+};
+#endif
+
+template <class T>
+struct hash<T*> {
+    size_t operator()(T* ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr));
+    }
+};
+
+template <class T>
+struct hash<std::unique_ptr<T>> {
+    size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <class T>
+struct hash<std::shared_ptr<T>> {
+    size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <typename Enum>
+struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
+    size_t operator()(Enum e) const noexcept {
+        using Underlying = typename std::underlying_type<Enum>::type;
+        return hash<Underlying>{}(static_cast<Underlying>(e));
+    }
+};
+
+#define ROBIN_HOOD_HASH_INT(T)                           \
+    template <>                                          \
+    struct hash<T> {                                     \
+        size_t operator()(T const& obj) const noexcept { \
+            return hash_int(static_cast<uint64_t>(obj)); \
+        }                                                \
+    }
+
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wuseless-cast"
+#endif
+// see https://en.cppreference.com/w/cpp/utility/hash
+ROBIN_HOOD_HASH_INT(bool);
+ROBIN_HOOD_HASH_INT(char);
+ROBIN_HOOD_HASH_INT(signed char);
+ROBIN_HOOD_HASH_INT(unsigned char);
+ROBIN_HOOD_HASH_INT(char16_t);
+ROBIN_HOOD_HASH_INT(char32_t);
+#if ROBIN_HOOD(HAS_NATIVE_WCHART)
+ROBIN_HOOD_HASH_INT(wchar_t);
+#endif
+ROBIN_HOOD_HASH_INT(short);
+ROBIN_HOOD_HASH_INT(unsigned short);
+ROBIN_HOOD_HASH_INT(int);
+ROBIN_HOOD_HASH_INT(unsigned int);
+ROBIN_HOOD_HASH_INT(long);
+ROBIN_HOOD_HASH_INT(long long);
+ROBIN_HOOD_HASH_INT(unsigned long);
+ROBIN_HOOD_HASH_INT(unsigned long long);
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic pop
+#endif
+namespace detail {
+
+template <typename T>
+struct void_type {
+    using type = void;
+};
+
+template <typename T, typename = void>
+struct has_is_transparent : public std::false_type {};
+
+template <typename T>
+struct has_is_transparent<T, typename void_type<typename T::is_transparent>::type>
+    : public std::true_type {};
+
+// using wrapper classes for hash and key_equal prevents the diamond problem when the same type
+// is used. see https://stackoverflow.com/a/28771920/48181
+template <typename T>
+struct WrapHash : public T {
+    WrapHash() = default;
+    explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+template <typename T>
+struct WrapKeyEqual : public T {
+    WrapKeyEqual() = default;
+    explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+// A highly optimized hashmap implementation, using the Robin Hood algorithm.
+//
+// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but
+// be about 2x faster in most cases and require much less allocations.
+//
+// This implementation uses the following memory layout:
+//
+// [Node, Node, ... Node | info, info, ... infoSentinel ]
+//
+// * Node: either a DataNode that directly has the std::pair<key, val> as member,
+//   or a DataNode with a pointer to std::pair<key,val>. Which DataNode representation to use
+//   depends on how fast the swap() operation is. Heuristically, this is automatically choosen
+//   based on sizeof(). there are always 2^n Nodes.
+//
+// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes.
+//   Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the
+//   corresponding node contains data. Set to 2 means the corresponding Node is filled, but it
+//   actually belongs to the previous position and was pushed out because that place is already
+//   taken.
+//
+// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the
+//   need for a idx variable.
+//
+// According to STL, order of templates has effect on throughput. That's why I've moved the
+// boolean to the front.
+// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/
+template <bool IsFlat, size_t MaxLoadFactor100, typename Key, typename T, typename Hash,
+          typename KeyEqual>
+class Table
+    : public WrapHash<Hash>,
+      public WrapKeyEqual<KeyEqual>,
+      detail::NodeAllocator<
+          typename std::conditional<
+              std::is_void<T>::value, Key,
+              robin_hood::pair<typename std::conditional<IsFlat, Key, Key const>::type, T>>::type,
+          4, 16384, IsFlat> {
+public:
+    static constexpr bool is_flat = IsFlat;
+    static constexpr bool is_map = !std::is_void<T>::value;
+    static constexpr bool is_set = !is_map;
+    static constexpr bool is_transparent =
+        has_is_transparent<Hash>::value && has_is_transparent<KeyEqual>::value;
+
+    using key_type = Key;
+    using mapped_type = T;
+    using value_type = typename std::conditional<
+        is_set, Key,
+        robin_hood::pair<typename std::conditional<is_flat, Key, Key const>::type, T>>::type;
+    using size_type = size_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using Self = Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+
+private:
+    static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100,
+                  "MaxLoadFactor100 needs to be >10 && < 100");
+
+    using WHash = WrapHash<Hash>;
+    using WKeyEqual = WrapKeyEqual<KeyEqual>;
+
+    // configuration defaults
+
+    // make sure we have 8 elements, needed to quickly rehash mInfo
+    static constexpr size_t InitialNumElements = sizeof(uint64_t);
+    static constexpr uint32_t InitialInfoNumBits = 5;
+    static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
+    static constexpr size_t InfoMask = InitialInfoInc - 1U;
+    static constexpr uint8_t InitialInfoHashShift = 0;
+    using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
+
+    // type needs to be wider than uint8_t.
+    using InfoType = uint32_t;
+
+    // DataNode ////////////////////////////////////////////////////////
+
+    // Primary template for the data node. We have special implementations for small and big
+    // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these
+    // on the heap so swap merely swaps a pointer.
+    template <typename M, bool>
+    class DataNode {};
+
+    // Small: just allocate on the stack.
+    template <typename M>
+    class DataNode<M, true> final {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept(
+            noexcept(value_type(std::forward<Args>(args)...)))
+            : mData(std::forward<Args>(args)...) {}
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, true>&& n) noexcept(
+            std::is_nothrow_move_constructible<value_type>::value)
+            : mData(std::move(n.mData)) {}
+
+        // doesn't do anything
+        void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {}
+        void destroyDoNotDeallocate() noexcept {}
+
+        value_type const* operator->() const noexcept {
+            return &mData;
+        }
+        value_type* operator->() noexcept {
+            return &mData;
+        }
+
+        const value_type& operator*() const noexcept {
+            return mData;
+        }
+
+        value_type& operator*() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData.second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, MT const&>::type getSecond() const noexcept {
+            return mData.second;
+        }
+
+        void swap(DataNode<M, true>& o) noexcept(
+            noexcept(std::declval<value_type>().swap(std::declval<value_type>()))) {
+            mData.swap(o.mData);
+        }
+
+    private:
+        value_type mData;
+    };
+
+    // big object: allocate on heap.
+    template <typename M>
+    class DataNode<M, false> {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& map, Args&&... args)
+            : mData(map.allocate()) {
+            ::new (static_cast<void*>(mData)) value_type(std::forward<Args>(args)...);
+        }
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, false>&& n) noexcept
+            : mData(std::move(n.mData)) {}
+
+        void destroy(M& map) noexcept {
+            // don't deallocate, just put it into list of datapool.
+            mData->~value_type();
+            map.deallocate(mData);
+        }
+
+        void destroyDoNotDeallocate() noexcept {
+            mData->~value_type();
+        }
+
+        value_type const* operator->() const noexcept {
+            return mData;
+        }
+
+        value_type* operator->() noexcept {
+            return mData;
+        }
+
+        const value_type& operator*() const {
+            return *mData;
+        }
+
+        value_type& operator*() {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return *mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData->second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT const&>::type getSecond() const noexcept {
+            return mData->second;
+        }
+
+        void swap(DataNode<M, false>& o) noexcept {
+            using std::swap;
+            swap(mData, o.mData);
+        }
+
+    private:
+        value_type* mData;
+    };
+
+    using Node = DataNode<Self, IsFlat>;
+
+    // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required)
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept {
+        return n.getFirst();
+    }
+
+    // in case we have void mapped_type, we are not using a pair, thus we just route k through.
+    // No need to disable this because it's just not used if not applicable.
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept {
+        return k;
+    }
+
+    // in case we have non-void mapped_type, we have a standard robin_hood::pair
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, key_type const&>::type
+        getFirstConst(value_type const& vt) const noexcept {
+        return vt.first;
+    }
+
+    // Cloner //////////////////////////////////////////////////////////
+
+    template <typename M, bool UseMemcpy>
+    struct Cloner;
+
+    // fast path: Just copy data, without allocating anything.
+    template <typename M>
+    struct Cloner<M, true> {
+        void operator()(M const& source, M& target) const {
+            auto const* const src = reinterpret_cast<char const*>(source.mKeyVals);
+            auto* tgt = reinterpret_cast<char*>(target.mKeyVals);
+            auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1);
+            std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt);
+        }
+    };
+
+    template <typename M>
+    struct Cloner<M, false> {
+        void operator()(M const& s, M& t) const {
+            auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1);
+            std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo);
+
+            for (size_t i = 0; i < numElementsWithBuffer; ++i) {
+                if (t.mInfo[i]) {
+                    ::new (static_cast<void*>(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]);
+                }
+            }
+        }
+    };
+
+    // Destroyer ///////////////////////////////////////////////////////
+
+    template <typename M, bool IsFlatAndTrivial>
+    struct Destroyer {};
+
+    template <typename M>
+    struct Destroyer<M, true> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+    };
+
+    template <typename M>
+    struct Destroyer<M, false> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroy(m);
+                    n.~Node();
+                }
+            }
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroyDoNotDeallocate();
+                    n.~Node();
+                }
+            }
+        }
+    };
+
+    // Iter ////////////////////////////////////////////////////////////
+
+    struct fast_forward_tag {};
+
+    // generic iterator for both const_iterator and iterator.
+    template <bool IsConst>
+    // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions)
+    class Iter {
+    private:
+        using NodePtr = typename std::conditional<IsConst, Node const*, Node*>::type;
+
+    public:
+        using difference_type = std::ptrdiff_t;
+        using value_type = typename Self::value_type;
+        using reference = typename std::conditional<IsConst, value_type const&, value_type&>::type;
+        using pointer = typename std::conditional<IsConst, value_type const*, value_type*>::type;
+        using iterator_category = std::forward_iterator_tag;
+
+        // default constructed iterator can be compared to itself, but WON'T return true when
+        // compared to end().
+        Iter() = default;
+
+        // Rule of zero: nothing specified. The conversion constructor is only enabled for
+        // iterator to const_iterator, so it doesn't accidentally work as a copy ctor.
+
+        // Conversion constructor from iterator to const_iterator.
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        // NOLINTNEXTLINE(hicpp-explicit-conversions)
+        Iter(Iter<OtherIsConst> const& other) noexcept
+            : mKeyVals(other.mKeyVals)
+            , mInfo(other.mInfo) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr,
+             fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {
+            fastForward();
+        }
+
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        Iter& operator=(Iter<OtherIsConst> const& other) noexcept {
+            mKeyVals = other.mKeyVals;
+            mInfo = other.mInfo;
+            return *this;
+        }
+
+        // prefix increment. Undefined behavior if we are at end()!
+        Iter& operator++() noexcept {
+            mInfo++;
+            mKeyVals++;
+            fastForward();
+            return *this;
+        }
+
+        Iter operator++(int) noexcept {
+            Iter tmp = *this;
+            ++(*this);
+            return tmp;
+        }
+
+        reference operator*() const {
+            return **mKeyVals;
+        }
+
+        pointer operator->() const {
+            return &**mKeyVals;
+        }
+
+        template <bool O>
+        bool operator==(Iter<O> const& o) const noexcept {
+            return mKeyVals == o.mKeyVals;
+        }
+
+        template <bool O>
+        bool operator!=(Iter<O> const& o) const noexcept {
+            return mKeyVals != o.mKeyVals;
+        }
+
+    private:
+        // fast forward to the next non-free info byte
+        // I've tried a few variants that don't depend on intrinsics, but unfortunately they are
+        // quite a bit slower than this one. So I've reverted that change again. See map_benchmark.
+        void fastForward() noexcept {
+            size_t n = 0;
+            while (0U == (n = detail::unaligned_load<size_t>(mInfo))) {
+                mInfo += sizeof(size_t);
+                mKeyVals += sizeof(size_t);
+            }
+#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+            // we know for certain that within the next 8 bytes we'll find a non-zero one.
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
+                mInfo += 4;
+                mKeyVals += 4;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
+                mInfo += 2;
+                mKeyVals += 2;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
+                mInfo += 1;
+                mKeyVals += 1;
+            }
+#else
+#    if ROBIN_HOOD(LITTLE_ENDIAN)
+            auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+#    else
+            auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
+#    endif
+            mInfo += inc;
+            mKeyVals += inc;
+#endif
+        }
+
+        friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+        NodePtr mKeyVals{nullptr};
+        uint8_t const* mInfo{nullptr};
+    };
+
+    ////////////////////////////////////////////////////////////////////
+
+    // highly performance relevant code.
+    // Lower bits are used for indexing into the array (2^n size)
+    // The upper 1-5 bits need to be a reasonable good hash, to save comparisons.
+    template <typename HashKey>
+    void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const {
+        // In addition to whatever hash is used, add another mul & shift so we get better hashing.
+        // This serves as a bad hash prevention, if the given data is
+        // badly mixed.
+        auto h = static_cast<uint64_t>(WHash::operator()(key));
+
+        h *= mHashMultiplier;
+        h ^= h >> 33U;
+
+        // the lower InitialInfoNumBits are reserved for info.
+        *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
+        *idx = (static_cast<size_t>(h) >> InitialInfoNumBits) & mMask;
+    }
+
+    // forwards the index by one, wrapping around at the end
+    void next(InfoType* info, size_t* idx) const noexcept {
+        *idx = *idx + 1;
+        *info += mInfoInc;
+    }
+
+    void nextWhileLess(InfoType* info, size_t* idx) const noexcept {
+        // unrolling this by hand did not bring any speedups.
+        while (*info < mInfo[*idx]) {
+            next(info, idx);
+        }
+    }
+
+    // Shift everything up by one element. Tries to move stuff around.
+    void
+    shiftUp(size_t startIdx,
+            size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        auto idx = startIdx;
+        ::new (static_cast<void*>(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1]));
+        while (--idx != insertion_idx) {
+            mKeyVals[idx] = std::move(mKeyVals[idx - 1]);
+        }
+
+        idx = startIdx;
+        while (idx != insertion_idx) {
+            ROBIN_HOOD_COUNT(shiftUp)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx - 1] + mInfoInc);
+            if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+            --idx;
+        }
+    }
+
+    void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        // until we find one that is either empty or has zero offset.
+        // TODO(martinus) we don't need to move everything, just the last one for the same
+        // bucket.
+        mKeyVals[idx].destroy(*this);
+
+        // until we find one that is either empty or has zero offset.
+        while (mInfo[idx + 1] >= 2 * mInfoInc) {
+            ROBIN_HOOD_COUNT(shiftDown)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx + 1] - mInfoInc);
+            mKeyVals[idx] = std::move(mKeyVals[idx + 1]);
+            ++idx;
+        }
+
+        mInfo[idx] = 0;
+        // don't destroy, we've moved it
+        // mKeyVals[idx].destroy(*this);
+        mKeyVals[idx].~Node();
+    }
+
+    // copy of find(), except that it returns iterator instead of const_iterator.
+    template <typename Other>
+    ROBIN_HOOD(NODISCARD)
+    size_t findIdx(Other const& key) const {
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        do {
+            // unrolling this twice gives a bit of a speedup. More unrolling did not help.
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found!
+        return mMask == 0 ? 0
+                          : static_cast<size_t>(std::distance(
+                                mKeyVals, reinterpret_cast_no_cast_align_warning<Node*>(mInfo)));
+    }
+
+    void cloneData(const Table& o) {
+        Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
+    }
+
+    // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
+    // @return True on success, false if something went wrong
+    void insert_move(Node&& keyval) {
+        // we don't retry, fail if overflowing
+        // don't need to check max num elements
+        if (0 == mMaxNumElementsAllowed && !try_increase_info()) {
+            throwOverflowError();
+        }
+
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(keyval.getFirst(), &idx, &info);
+
+        // skip forward. Use <= because we are certain that the element is not there.
+        while (info <= mInfo[idx]) {
+            idx = idx + 1;
+            info += mInfoInc;
+        }
+
+        // key not found, so we are now exactly where we want to insert it.
+        auto const insertion_idx = idx;
+        auto const insertion_info = static_cast<uint8_t>(info);
+        if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+            mMaxNumElementsAllowed = 0;
+        }
+
+        // find an empty spot
+        while (0 != mInfo[idx]) {
+            next(&info, &idx);
+        }
+
+        auto& l = mKeyVals[insertion_idx];
+        if (idx == insertion_idx) {
+            ::new (static_cast<void*>(&l)) Node(std::move(keyval));
+        } else {
+            shiftUp(idx, insertion_idx);
+            l = std::move(keyval);
+        }
+
+        // put at empty spot
+        mInfo[insertion_idx] = insertion_info;
+
+        ++mNumElements;
+    }
+
+public:
+    using iterator = Iter<false>;
+    using const_iterator = Iter<true>;
+
+    Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
+        : WHash()
+        , WKeyEqual() {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
+    // This tremendously speeds up ctor & dtor of a map that never receives an element. The
+    // penalty is payed at the first insert, and not before. Lookup of this empty map works
+    // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
+    // standard, but we can ignore it.
+    explicit Table(
+        size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
+        const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    template <typename Iter>
+    Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0,
+          const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(first, last);
+    }
+
+    Table(std::initializer_list<value_type> initlist,
+          size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+          const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(initlist.begin(), initlist.end());
+    }
+
+    Table(Table&& o) noexcept
+        : WHash(std::move(static_cast<WHash&>(o)))
+        , WKeyEqual(std::move(static_cast<WKeyEqual&>(o)))
+        , DataPool(std::move(static_cast<DataPool&>(o))) {
+        ROBIN_HOOD_TRACE(this)
+        if (o.mMask) {
+            mHashMultiplier = std::move(o.mHashMultiplier);
+            mKeyVals = std::move(o.mKeyVals);
+            mInfo = std::move(o.mInfo);
+            mNumElements = std::move(o.mNumElements);
+            mMask = std::move(o.mMask);
+            mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+            mInfoInc = std::move(o.mInfoInc);
+            mInfoHashShift = std::move(o.mInfoHashShift);
+            // set other's mask to 0 so its destructor won't do anything
+            o.init();
+        }
+    }
+
+    Table& operator=(Table&& o) noexcept {
+        ROBIN_HOOD_TRACE(this)
+        if (&o != this) {
+            if (o.mMask) {
+                // only move stuff if the other map actually has some data
+                destroy();
+                mHashMultiplier = std::move(o.mHashMultiplier);
+                mKeyVals = std::move(o.mKeyVals);
+                mInfo = std::move(o.mInfo);
+                mNumElements = std::move(o.mNumElements);
+                mMask = std::move(o.mMask);
+                mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+                mInfoInc = std::move(o.mInfoInc);
+                mInfoHashShift = std::move(o.mInfoHashShift);
+                WHash::operator=(std::move(static_cast<WHash&>(o)));
+                WKeyEqual::operator=(std::move(static_cast<WKeyEqual&>(o)));
+                DataPool::operator=(std::move(static_cast<DataPool&>(o)));
+
+                o.init();
+
+            } else {
+                // nothing in the other map => just clear us.
+                clear();
+            }
+        }
+        return *this;
+    }
+
+    Table(const Table& o)
+        : WHash(static_cast<const WHash&>(o))
+        , WKeyEqual(static_cast<const WKeyEqual&>(o))
+        , DataPool(static_cast<const DataPool&>(o)) {
+        ROBIN_HOOD_TRACE(this)
+        if (!o.empty()) {
+            // not empty: create an exact copy. it is also possible to just iterate through all
+            // elements and insert them, but copying is probably faster.
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mHashMultiplier = o.mHashMultiplier;
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+            // no need for calloc because clonData does memcpy
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            mNumElements = o.mNumElements;
+            mMask = o.mMask;
+            mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+            mInfoInc = o.mInfoInc;
+            mInfoHashShift = o.mInfoHashShift;
+            cloneData(o);
+        }
+    }
+
+    // Creates a copy of the given map. Copy constructor of each entry is used.
+    // Not sure why clang-tidy thinks this doesn't handle self assignment, it does
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    Table& operator=(Table const& o) {
+        ROBIN_HOOD_TRACE(this)
+        if (&o == this) {
+            // prevent assigning of itself
+            return *this;
+        }
+
+        // we keep using the old allocator and not assign the new one, because we want to keep
+        // the memory available. when it is the same size.
+        if (o.empty()) {
+            if (0 == mMask) {
+                // nothing to do, we are empty too
+                return *this;
+            }
+
+            // not empty: destroy what we have there
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            destroy();
+            init();
+            WHash::operator=(static_cast<const WHash&>(o));
+            WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+            DataPool::operator=(static_cast<DataPool const&>(o));
+
+            return *this;
+        }
+
+        // clean up old stuff
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        if (mMask != o.mMask) {
+            // no luck: we don't have the same array size allocated, so we need to realloc.
+            if (0 != mMask) {
+                // only deallocate if we actually have data!
+                ROBIN_HOOD_LOG("std::free")
+                std::free(mKeyVals);
+            }
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+
+            // no need for calloc here because cloneData performs a memcpy.
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            // sentinel is set in cloneData
+        }
+        WHash::operator=(static_cast<const WHash&>(o));
+        WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+        DataPool::operator=(static_cast<DataPool const&>(o));
+        mHashMultiplier = o.mHashMultiplier;
+        mNumElements = o.mNumElements;
+        mMask = o.mMask;
+        mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+        mInfoInc = o.mInfoInc;
+        mInfoHashShift = o.mInfoHashShift;
+        cloneData(o);
+
+        return *this;
+    }
+
+    // Swaps everything between the two maps.
+    void swap(Table& o) {
+        ROBIN_HOOD_TRACE(this)
+        using std::swap;
+        swap(o, *this);
+    }
+
+    // Clears all data, without resizing.
+    void clear() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            // don't do anything! also important because we don't want to write to
+            // DummyInfoByte::b, even though we would just write 0 to it.
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+        // clear everything, then set the sentinel again
+        uint8_t const z = 0;
+        std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z);
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // Destroys the map and all it's contents.
+    ~Table() {
+        ROBIN_HOOD_TRACE(this)
+        destroy();
+    }
+
+    // Checks if both tables contain the same entries. Order is irrelevant.
+    bool operator==(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        if (other.size() != size()) {
+            return false;
+        }
+        for (auto const& otherEntry : other) {
+            if (!has(otherEntry)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool operator!=(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        return !operator==(other);
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(key),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(key), std::forward_as_tuple());
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+        }
+
+        return mKeyVals[idxAndState.first].getSecond();
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](key_type&& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first]))
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] =
+                Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)),
+                     std::forward_as_tuple());
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+        }
+
+        return mKeyVals[idxAndState.first].getSecond();
+    }
+
+    template <typename Iter>
+    void insert(Iter first, Iter last) {
+        for (; first != last; ++first) {
+            // value_type ctor needed because this might be called with std::pair's
+            insert(value_type(*first));
+        }
+    }
+
+    void insert(std::initializer_list<value_type> ilist) {
+        for (auto&& vt : ilist) {
+            insert(std::move(vt));
+        }
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        Node n{*this, std::forward<Args>(args)...};
+        auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n));
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            n.destroy(*this);
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(*this, std::move(n));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = std::move(n);
+            break;
+
+        case InsertionState::overflow_error:
+            n.destroy(*this);
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    template <typename... Args>
+    iterator emplace_hint(const_iterator position, Args&&... args) {
+        (void)position;
+        return emplace(std::forward<Args>(args)...).first;
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& key, Args&&... args) {
+        return try_emplace_impl(key, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(key, std::forward<Args>(args)...).first;
+    }
+
+    template <typename... Args>
+    iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...).first;
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const key_type& key, Mapped&& obj) {
+        return insertOrAssignImpl(key, std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(key_type&& key, Mapped&& obj) {
+        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) {
+        (void)hint;
+        return insertOrAssignImpl(key, std::forward<Mapped>(obj)).first;
+    }
+
+    template <typename Mapped>
+    iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) {
+        (void)hint;
+        return insertOrAssignImpl(std::move(key), std::forward<Mapped>(obj)).first;
+    }
+
+    std::pair<iterator, bool> insert(const value_type& keyval) {
+        ROBIN_HOOD_TRACE(this)
+        return emplace(keyval);
+    }
+
+    iterator insert(const_iterator hint, const value_type& keyval) {
+        (void)hint;
+        return emplace(keyval).first;
+    }
+
+    std::pair<iterator, bool> insert(value_type&& keyval) {
+        return emplace(std::move(keyval));
+    }
+
+    iterator insert(const_iterator hint, value_type&& keyval) {
+        (void)hint;
+        return emplace(std::move(keyval)).first;
+    }
+
+    // Returns 1 if key is found, 0 otherwise.
+    size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, size_t>::type count(const OtherKey& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        return 1U == count(key);
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, bool>::type contains(const OtherKey& key) const {
+        return 1U == count(key);
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type at(key_type const& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q const&>::type at(key_type const& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, // NOLINT(modernize-use-nodiscard)
+                            const_iterator>::type  // NOLINT(modernize-use-nodiscard)
+    find(const OtherKey& key) const {              // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator find(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    iterator find(const OtherKey& key, is_transparent_tag /*unused*/) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, iterator>::type find(const OtherKey& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator begin() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return end();
+        }
+        return iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+    const_iterator begin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cbegin();
+    }
+    const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return cend();
+        }
+        return const_iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+
+    iterator end() {
+        ROBIN_HOOD_TRACE(this)
+        // no need to supply valid info pointer: end() must not be dereferenced, and only node
+        // pointer is compared.
+        return iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+    const_iterator end() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cend();
+    }
+    const_iterator cend() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return const_iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+
+    iterator erase(const_iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // its safe to perform const cast here
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+        return erase(iterator{const_cast<Node*>(pos.mKeyVals), const_cast<uint8_t*>(pos.mInfo)});
+    }
+
+    // Erases element at pos, returns iterator to the next element.
+    iterator erase(iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // we assume that pos always points to a valid entry, and not end().
+        auto const idx = static_cast<size_t>(pos.mKeyVals - mKeyVals);
+
+        shiftDown(idx);
+        --mNumElements;
+
+        if (*pos.mInfo) {
+            // we've backward shifted, return this again
+            return pos;
+        }
+
+        // no backward shift, return next element
+        return ++pos;
+    }
+
+    size_t erase(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        // check while info matches with the source idx
+        do {
+            if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                shiftDown(idx);
+                --mNumElements;
+                return 1;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found to delete
+        return 0;
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // exactly the same as reserve(c).
+    void rehash(size_t c) {
+        // forces a reserve
+        reserve(c, true);
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
+    void reserve(size_t c) {
+        // reserve, but don't force rehash
+        reserve(c, false);
+    }
+
+    // If possible reallocates the map to a smaller one. This frees the underlying table.
+    // Does not do anything if load_factor is too large for decreasing the table's size.
+    void compact() {
+        ROBIN_HOOD_TRACE(this)
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+        // only actually do anything when the new size is bigger than the old one. This prevents to
+        // continuously allocate for each reserve() call.
+        if (newSize < mMask + 1) {
+            rehashPowerOfTwo(newSize, true);
+        }
+    }
+
+    size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return mNumElements;
+    }
+
+    size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<size_type>(-1);
+    }
+
+    ROBIN_HOOD(NODISCARD) bool empty() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return 0 == mNumElements;
+    }
+
+    float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return MaxLoadFactor100 / 100.0F;
+    }
+
+    // Average number of elements per bucket. Since we allow only 1 per bucket
+    float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<float>(size()) / static_cast<float>(mMask + 1);
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t mask() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return mMask;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
+        if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
+            return maxElements * MaxLoadFactor100 / 100;
+        }
+
+        // we might be a bit inprecise, but since maxElements is quite large that doesn't matter
+        return (maxElements / 100) * MaxLoadFactor100;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept {
+        // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load
+        // 64bit types.
+        return numElements + sizeof(uint64_t);
+    }
+
+    ROBIN_HOOD(NODISCARD)
+    size_t calcNumElementsWithBuffer(size_t numElements) const noexcept {
+        auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements);
+        return numElements + (std::min)(maxNumElementsAllowed, (static_cast<size_t>(0xFF)));
+    }
+
+    // calculation only allowed for 2^n values
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const {
+#if ROBIN_HOOD(BITNESS) == 64
+        return numElements * sizeof(Node) + calcNumBytesInfo(numElements);
+#else
+        // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows.
+        auto const ne = static_cast<uint64_t>(numElements);
+        auto const s = static_cast<uint64_t>(sizeof(Node));
+        auto const infos = static_cast<uint64_t>(calcNumBytesInfo(numElements));
+
+        auto const total64 = ne * s + infos;
+        auto const total = static_cast<size_t>(total64);
+
+        if (ROBIN_HOOD_UNLIKELY(static_cast<uint64_t>(total) != total64)) {
+            throwOverflowError();
+        }
+        return total;
+#endif
+    }
+
+private:
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(e.first);
+        return it != end() && it->second == e.second;
+    }
+
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        return find(e) != end();
+    }
+
+    void reserve(size_t c, bool forceRehash) {
+        ROBIN_HOOD_TRACE(this)
+        auto const minElementsAllowed = (std::max)(c, mNumElements);
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+        // only actually do anything when the new size is bigger than the old one. This prevents to
+        // continuously allocate for each reserve() call.
+        if (forceRehash || newSize > mMask + 1) {
+            rehashPowerOfTwo(newSize, false);
+        }
+    }
+
+    // reserves space for at least the specified number of elements.
+    // only works if numBuckets if power of two
+    // True on success, false otherwise
+    void rehashPowerOfTwo(size_t numBuckets, bool forceFree) {
+        ROBIN_HOOD_TRACE(this)
+
+        Node* const oldKeyVals = mKeyVals;
+        uint8_t const* const oldInfo = mInfo;
+
+        const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        // resize operation: move stuff
+        initData(numBuckets);
+        if (oldMaxElementsWithBuffer > 1) {
+            for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) {
+                if (oldInfo[i] != 0) {
+                    // might throw an exception, which is really bad since we are in the middle of
+                    // moving stuff.
+                    insert_move(std::move(oldKeyVals[i]));
+                    // destroy the node but DON'T destroy the data.
+                    oldKeyVals[i].~Node();
+                }
+            }
+
+            // this check is not necessary as it's guarded by the previous if, but it helps
+            // silence g++'s overeager "attempt to free a non-heap object 'map'
+            // [-Werror=free-nonheap-object]" warning.
+            if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+                // don't destroy old data: put it into the pool instead
+                if (forceFree) {
+                    std::free(oldKeyVals);
+                } else {
+                    DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+                }
+            }
+        }
+    }
+
+    ROBIN_HOOD(NOINLINE) void throwOverflowError() const {
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+        throw std::overflow_error("robin_hood::map overflow");
+#else
+        abort();
+#endif
+    }
+
+    template <typename OtherKey, typename... Args>
+    std::pair<iterator, bool> try_emplace_impl(OtherKey&& key, Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
+                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
+                std::forward_as_tuple(std::forward<Args>(args)...));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
+                                               std::forward_as_tuple(std::forward<Args>(args)...));
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    template <typename OtherKey, typename Mapped>
+    std::pair<iterator, bool> insertOrAssignImpl(OtherKey&& key, Mapped&& obj) {
+        ROBIN_HOOD_TRACE(this)
+        auto idxAndState = insertKeyPrepareEmptySpot(key);
+        switch (idxAndState.second) {
+        case InsertionState::key_found:
+            mKeyVals[idxAndState.first].getSecond() = std::forward<Mapped>(obj);
+            break;
+
+        case InsertionState::new_node:
+            ::new (static_cast<void*>(&mKeyVals[idxAndState.first])) Node(
+                *this, std::piecewise_construct, std::forward_as_tuple(std::forward<OtherKey>(key)),
+                std::forward_as_tuple(std::forward<Mapped>(obj)));
+            break;
+
+        case InsertionState::overwrite_node:
+            mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct,
+                                               std::forward_as_tuple(std::forward<OtherKey>(key)),
+                                               std::forward_as_tuple(std::forward<Mapped>(obj)));
+            break;
+
+        case InsertionState::overflow_error:
+            throwOverflowError();
+            break;
+        }
+
+        return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first),
+                              InsertionState::key_found != idxAndState.second);
+    }
+
+    void initData(size_t max_elements) {
+        mNumElements = 0;
+        mMask = max_elements - 1;
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
+
+        // malloc & zero mInfo. Faster than calloc everything.
+        auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+        ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
+                                      << numElementsWithBuffer << ")")
+        mKeyVals = reinterpret_cast<Node*>(
+            detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+        mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+        std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node));
+
+        // set sentinel
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    enum class InsertionState { overflow_error, key_found, new_node, overwrite_node };
+
+    // Finds key, and if not already present prepares a spot where to pot the key & value.
+    // This potentially shifts nodes out of the way, updates mInfo and number of inserted
+    // elements, so the only operation left to do is create/assign a new node at that spot.
+    template <typename OtherKey>
+    std::pair<size_t, InsertionState> insertKeyPrepareEmptySpot(OtherKey&& key) {
+        for (int i = 0; i < 256; ++i) {
+            size_t idx{};
+            InfoType info{};
+            keyToIdx(key, &idx, &info);
+            nextWhileLess(&info, &idx);
+
+            // while we potentially have a match
+            while (info == mInfo[idx]) {
+                if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                    // key already exists, do NOT insert.
+                    // see http://en.cppreference.com/w/cpp/container/unordered_map/insert
+                    return std::make_pair(idx, InsertionState::key_found);
+                }
+                next(&info, &idx);
+            }
+
+            // unlikely that this evaluates to true
+            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
+                if (!increase_size()) {
+                    return std::make_pair(size_t(0), InsertionState::overflow_error);
+                }
+                continue;
+            }
+
+            // key not found, so we are now exactly where we want to insert it.
+            auto const insertion_idx = idx;
+            auto const insertion_info = info;
+            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+
+            // find an empty spot
+            while (0 != mInfo[idx]) {
+                next(&info, &idx);
+            }
+
+            if (idx != insertion_idx) {
+                shiftUp(idx, insertion_idx);
+            }
+            // put at empty spot
+            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
+            ++mNumElements;
+            return std::make_pair(insertion_idx, idx == insertion_idx
+                                                     ? InsertionState::new_node
+                                                     : InsertionState::overwrite_node);
+        }
+
+        // enough attempts failed, so finally give up.
+        return std::make_pair(size_t(0), InsertionState::overflow_error);
+    }
+
+    bool try_increase_info() {
+        ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements
+                                   << ", maxNumElementsAllowed="
+                                   << calcMaxNumElementsAllowed(mMask + 1))
+        if (mInfoInc <= 2) {
+            // need to be > 2 so that shift works (otherwise undefined behavior!)
+            return false;
+        }
+        // we got space left, try to make info smaller
+        mInfoInc = static_cast<uint8_t>(mInfoInc >> 1U);
+
+        // remove one bit of the hash, leaving more space for the distance info.
+        // This is extremely fast because we can operate on 8 bytes at once.
+        ++mInfoHashShift;
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        for (size_t i = 0; i < numElementsWithBuffer; i += 8) {
+            auto val = unaligned_load<uint64_t>(mInfo + i);
+            val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f);
+            std::memcpy(mInfo + i, &val, sizeof(val));
+        }
+        // update sentinel, which might have been cleared out!
+        mInfo[numElementsWithBuffer] = 1;
+
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        return true;
+    }
+
+    // True if resize was possible, false otherwise
+    bool increase_size() {
+        // nothing allocated yet? just allocate InitialNumElements
+        if (0 == mMask) {
+            initData(InitialNumElements);
+            return true;
+        }
+
+        auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        if (mNumElements < maxNumElementsAllowed && try_increase_info()) {
+            return true;
+        }
+
+        ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed="
+                                       << maxNumElementsAllowed << ", load="
+                                       << (static_cast<double>(mNumElements) * 100.0 /
+                                           (static_cast<double>(mMask) + 1)))
+
+        if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) {
+            // we have to resize, even though there would still be plenty of space left!
+            // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case
+            // we have to rehash a few times
+            nextHashMultiplier();
+            rehashPowerOfTwo(mMask + 1, true);
+        } else {
+            // we've reached the capacity of the map, so the hash seems to work nice. Keep using it.
+            rehashPowerOfTwo((mMask + 1) * 2, false);
+        }
+        return true;
+    }
+
+    void nextHashMultiplier() {
+        // adding an *even* number, so that the multiplier will always stay odd. This is necessary
+        // so that the hash stays a mixing function (and thus doesn't have any information loss).
+        mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54);
+    }
+
+    void destroy() {
+        if (0 == mMask) {
+            // don't deallocate!
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}
+            .nodesDoNotDeallocate(*this);
+
+        // This protection against not deleting mMask shouldn't be needed as it's sufficiently
+        // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
+        // reports a compile error: attempt to free a non-heap object 'fm'
+        // [-Werror=free-nonheap-object]
+        if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mKeyVals);
+        }
+    }
+
+    void init() noexcept {
+        mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask);
+        mInfo = reinterpret_cast<uint8_t*>(&mMask);
+        mNumElements = 0;
+        mMask = 0;
+        mMaxNumElementsAllowed = 0;
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // members are sorted so no padding occurs
+    uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53);                // 8 byte  8
+    Node* mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask); // 8 byte 16
+    uint8_t* mInfo = reinterpret_cast<uint8_t*>(&mMask);                    // 8 byte 24
+    size_t mNumElements = 0;                                                // 8 byte 32
+    size_t mMask = 0;                                                       // 8 byte 40
+    size_t mMaxNumElementsAllowed = 0;                                      // 8 byte 48
+    InfoType mInfoInc = InitialInfoInc;                                     // 4 byte 52
+    InfoType mInfoHashShift = InitialInfoHashShift;                         // 4 byte 56
+                                                    // 16 byte 56 if NodeAllocator
+};
+
+} // namespace detail
+
+// map
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_flat_map = detail::Table<true, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_node_map = detail::Table<false, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_map =
+    detail::Table<sizeof(robin_hood::pair<Key, T>) <= sizeof(size_t) * 6 &&
+                      std::is_nothrow_move_constructible<robin_hood::pair<Key, T>>::value &&
+                      std::is_nothrow_move_assignable<robin_hood::pair<Key, T>>::value,
+                  MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+// set
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_flat_set = detail::Table<true, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_node_set = detail::Table<false, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
+                                        std::is_nothrow_move_constructible<Key>::value &&
+                                        std::is_nothrow_move_assignable<Key>::value,
+                                    MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+} // namespace robin_hood
+
+#endif
diff --git a/src/time-extension-functions.cc b/src/time-extension-functions.cc
index 449d60c9..afe334b0 100644
--- a/src/time-extension-functions.cc
+++ b/src/time-extension-functions.cc
@@ -139,7 +139,7 @@ timeslice(sqlite3_value* time_in, nonstd::optional<const char*> slice_in_opt)
     auto actual_length
         = sql_strftime(ts.in(), ts.size(), win_start.to_timeval());
 
-    ts.shrink_to(actual_length);
+    ts.resize(actual_length);
     return text_auto_buffer{std::move(ts)};
 }
 
diff --git a/test/test_auto_mem.cc b/test/test_auto_mem.cc
index 2603c631..99c06055 100644
--- a/test/test_auto_mem.cc
+++ b/test/test_auto_mem.cc
@@ -27,6 +27,8 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <algorithm>
+
 #include <assert.h>
 #include <stdlib.h>
 
@@ -81,5 +83,20 @@ main(int argc, char* argv[])
     assert(free_count == 3);
     assert(last_free == &md1_val);
 
+    {
+        static const char* msg = "Hello, World!\nGoodbye, World!\nTest";
+
+        auto buf = auto_buffer::from(msg, strlen(msg));
+        auto first_lf = std::find(buf.begin(), buf.end(), '\n');
+        auto last_lf = std::find(buf.rbegin(), buf.rend(), '\n');
+
+        assert(std::distance(buf.begin(), first_lf) == 13);
+        assert(*first_lf == '\n');
+        assert(*last_lf == '\n');
+        auto last_lf_index = std::distance(last_lf, buf.rend()) - 1;
+        auto* last_lf_rchr = strrchr(msg, '\n');
+        assert(last_lf_index == (last_lf_rchr - msg));
+    }
+
     return retval;
 }
diff --git a/test/test_line_buffer2.cc b/test/test_line_buffer2.cc
index 5b071d9d..07b97652 100644
--- a/test/test_line_buffer2.cc
+++ b/test/test_line_buffer2.cc
@@ -79,7 +79,7 @@ main(int argc, char* argv[])
 
         auto fd = auto_fd(mkstemp(fn_template));
         remove(fn_template);
-        auto lb = line_buffer();
+        line_buffer lb;
 
         write(fd, TEST_DATA, strlen(TEST_DATA));
         lseek(fd, SEEK_SET, 0);