mirror of https://github.com/tstack/lnav
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
408 lines
10 KiB
C++
408 lines
10 KiB
C++
/**
|
|
* Copyright (c) 2014, Timothy Stack
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* * Neither the name of Timothy Stack nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* @file intern_string.cc
|
|
*/
|
|
|
|
#include <mutex>
|
|
|
|
#include "intern_string.hh"
|
|
|
|
#include <string.h>
|
|
|
|
#include "config.h"
|
|
#include "pcrepp/pcre2pp.hh"
|
|
#include "ww898/cp_utf8.hpp"
|
|
#include "xxHash/xxhash.h"
|
|
|
|
const static int TABLE_SIZE = 4095;
|
|
|
|
struct intern_string::intern_table {
|
|
~intern_table()
|
|
{
|
|
for (auto is : this->it_table) {
|
|
auto curr = is;
|
|
|
|
while (curr != nullptr) {
|
|
auto next = curr->is_next;
|
|
|
|
delete curr;
|
|
curr = next;
|
|
}
|
|
}
|
|
}
|
|
|
|
intern_string* it_table[TABLE_SIZE];
|
|
};
|
|
|
|
intern_table_lifetime
|
|
intern_string::get_table_lifetime()
|
|
{
|
|
static intern_table_lifetime retval = std::make_shared<intern_table>();
|
|
|
|
return retval;
|
|
}
|
|
|
|
unsigned long
|
|
hash_str(const char* str, size_t len)
|
|
{
|
|
return XXH3_64bits(str, len);
|
|
}
|
|
|
|
const intern_string*
|
|
intern_string::lookup(const char* str, ssize_t len) noexcept
|
|
{
|
|
unsigned long h;
|
|
intern_string* curr;
|
|
|
|
if (len == -1) {
|
|
len = strlen(str);
|
|
}
|
|
h = hash_str(str, len) % TABLE_SIZE;
|
|
|
|
{
|
|
static std::mutex table_mutex;
|
|
|
|
std::lock_guard<std::mutex> lk(table_mutex);
|
|
auto tab = get_table_lifetime();
|
|
|
|
curr = tab->it_table[h];
|
|
while (curr != nullptr) {
|
|
if (static_cast<ssize_t>(curr->is_str.size()) == len
|
|
&& strncmp(curr->is_str.c_str(), str, len) == 0)
|
|
{
|
|
return curr;
|
|
}
|
|
curr = curr->is_next;
|
|
}
|
|
|
|
curr = new intern_string(str, len);
|
|
curr->is_next = tab->it_table[h];
|
|
tab->it_table[h] = curr;
|
|
|
|
return curr;
|
|
}
|
|
}
|
|
|
|
const intern_string*
|
|
intern_string::lookup(const string_fragment& sf) noexcept
|
|
{
|
|
return lookup(sf.data(), sf.length());
|
|
}
|
|
|
|
const intern_string*
|
|
intern_string::lookup(const std::string& str) noexcept
|
|
{
|
|
return lookup(str.c_str(), str.size());
|
|
}
|
|
|
|
bool
|
|
intern_string::startswith(const char* prefix) const
|
|
{
|
|
const char* curr = this->is_str.data();
|
|
|
|
while (*prefix != '\0' && *prefix == *curr) {
|
|
prefix += 1;
|
|
curr += 1;
|
|
}
|
|
|
|
return *prefix == '\0';
|
|
}
|
|
|
|
string_fragment
|
|
string_fragment::trim(const char* tokens) const
|
|
{
|
|
string_fragment retval = *this;
|
|
|
|
while (retval.sf_begin < retval.sf_end) {
|
|
bool found = false;
|
|
|
|
for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
|
|
if (retval.sf_string[retval.sf_begin] == tokens[lpc]) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
break;
|
|
}
|
|
|
|
retval.sf_begin += 1;
|
|
}
|
|
while (retval.sf_begin < retval.sf_end) {
|
|
bool found = false;
|
|
|
|
for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
|
|
if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
break;
|
|
}
|
|
|
|
retval.sf_end -= 1;
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
string_fragment
|
|
string_fragment::trim() const
|
|
{
|
|
return this->trim(" \t\r\n");
|
|
}
|
|
|
|
nonstd::optional<string_fragment>
|
|
string_fragment::consume_n(int amount) const
|
|
{
|
|
if (amount > this->length()) {
|
|
return nonstd::nullopt;
|
|
}
|
|
|
|
return string_fragment{
|
|
this->sf_string,
|
|
this->sf_begin + amount,
|
|
this->sf_end,
|
|
};
|
|
}
|
|
|
|
string_fragment::split_result
|
|
string_fragment::split_n(int amount) const
|
|
{
|
|
if (amount > this->length()) {
|
|
return nonstd::nullopt;
|
|
}
|
|
|
|
return std::make_pair(
|
|
string_fragment{
|
|
this->sf_string,
|
|
this->sf_begin,
|
|
this->sf_begin + amount,
|
|
},
|
|
string_fragment{
|
|
this->sf_string,
|
|
this->sf_begin + amount,
|
|
this->sf_end,
|
|
});
|
|
}
|
|
|
|
std::vector<string_fragment>
|
|
string_fragment::split_lines() const
|
|
{
|
|
std::vector<string_fragment> retval;
|
|
int start = this->sf_begin;
|
|
|
|
for (auto index = start; index < this->sf_end; index++) {
|
|
if (this->sf_string[index] == '\n') {
|
|
retval.emplace_back(this->sf_string, start, index + 1);
|
|
start = index + 1;
|
|
}
|
|
}
|
|
if (retval.empty() || start < this->sf_end) {
|
|
retval.emplace_back(this->sf_string, start, this->sf_end);
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
Result<ssize_t, const char*>
|
|
string_fragment::utf8_length() const
|
|
{
|
|
ssize_t retval = 0;
|
|
|
|
for (ssize_t byte_index = this->sf_begin; byte_index < this->sf_end;) {
|
|
auto ch_size = TRY(ww898::utf::utf8::char_size([this, byte_index]() {
|
|
return std::make_pair(this->sf_string[byte_index],
|
|
this->sf_end - byte_index);
|
|
}));
|
|
byte_index += ch_size;
|
|
retval += 1;
|
|
}
|
|
|
|
return Ok(retval);
|
|
}
|
|
|
|
string_fragment::case_style
|
|
string_fragment::detect_text_case_style() const
|
|
{
|
|
static const auto LOWER_RE
|
|
= lnav::pcre2pp::code::from_const(R"(^[^A-Z]+$)");
|
|
static const auto UPPER_RE
|
|
= lnav::pcre2pp::code::from_const(R"(^[^a-z]+$)");
|
|
static const auto CAMEL_RE
|
|
= lnav::pcre2pp::code::from_const(R"(^(?:[A-Z][a-z0-9]+)+$)");
|
|
|
|
if (LOWER_RE.find_in(*this).ignore_error().has_value()) {
|
|
return case_style::lower;
|
|
}
|
|
if (UPPER_RE.find_in(*this).ignore_error().has_value()) {
|
|
return case_style::upper;
|
|
}
|
|
if (CAMEL_RE.find_in(*this).ignore_error().has_value()) {
|
|
return case_style::camel;
|
|
}
|
|
|
|
return case_style::mixed;
|
|
}
|
|
|
|
std::string
|
|
string_fragment::to_string_with_case_style(case_style style) const
|
|
{
|
|
std::string retval;
|
|
|
|
switch (style) {
|
|
case case_style::lower: {
|
|
for (auto ch : *this) {
|
|
retval.append(1, std::tolower(ch));
|
|
}
|
|
break;
|
|
}
|
|
case case_style::upper: {
|
|
for (auto ch : *this) {
|
|
retval.append(1, std::toupper(ch));
|
|
}
|
|
break;
|
|
}
|
|
case case_style::camel: {
|
|
retval = this->to_string();
|
|
if (!this->empty()) {
|
|
retval[0] = toupper(retval[0]);
|
|
}
|
|
break;
|
|
}
|
|
case case_style::mixed: {
|
|
return this->to_string();
|
|
}
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
std::string
|
|
string_fragment::to_unquoted_string() const
|
|
{
|
|
auto sub_sf = *this;
|
|
|
|
if (sub_sf.startswith("r") || sub_sf.startswith("u")) {
|
|
sub_sf = sub_sf.consume_n(1).value();
|
|
}
|
|
if (sub_sf.length() >= 2
|
|
&& ((sub_sf.startswith("\"") && sub_sf.endswith("\""))
|
|
|| (sub_sf.startswith("'") && sub_sf.endswith("'"))))
|
|
{
|
|
std::string retval;
|
|
|
|
sub_sf.sf_begin += 1;
|
|
sub_sf.sf_end -= 1;
|
|
retval.reserve(this->length());
|
|
|
|
auto in_escape = false;
|
|
for (auto ch : sub_sf) {
|
|
if (in_escape) {
|
|
switch (ch) {
|
|
case 'n':
|
|
retval.push_back('\n');
|
|
break;
|
|
case 't':
|
|
retval.push_back('\t');
|
|
break;
|
|
case 'r':
|
|
retval.push_back('\r');
|
|
break;
|
|
default:
|
|
retval.push_back(ch);
|
|
break;
|
|
}
|
|
in_escape = false;
|
|
} else if (ch == '\\') {
|
|
in_escape = true;
|
|
} else {
|
|
retval.push_back(ch);
|
|
}
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
return this->to_string();
|
|
}
|
|
|
|
uint32_t
|
|
string_fragment::front_codepoint() const
|
|
{
|
|
size_t index = 0;
|
|
auto read_res = ww898::utf::utf8::read(
|
|
[this, &index]() { return this->data()[index++]; });
|
|
if (read_res.isErr()) {
|
|
return this->data()[0];
|
|
}
|
|
return read_res.unwrap();
|
|
}
|
|
|
|
Result<ssize_t, const char*>
|
|
string_fragment::codepoint_to_byte_index(ssize_t cp_index) const
|
|
{
|
|
ssize_t retval = 0;
|
|
|
|
while (cp_index > 0) {
|
|
if (retval >= this->length()) {
|
|
return Err("index is beyond the end of the string");
|
|
}
|
|
auto ch_len = TRY(ww898::utf::utf8::char_size([this, retval]() {
|
|
return std::make_pair(this->data()[retval],
|
|
this->length() - retval - 1);
|
|
}));
|
|
|
|
retval += ch_len;
|
|
cp_index -= 1;
|
|
}
|
|
|
|
return Ok(retval);
|
|
}
|
|
|
|
size_t
|
|
string_fragment::column_width() const
|
|
{
|
|
auto index = this->sf_begin;
|
|
size_t retval = 0;
|
|
|
|
while (index < this->sf_end) {
|
|
auto read_res = ww898::utf::utf8::read(
|
|
[this, &index]() { return this->sf_string[index++]; });
|
|
if (read_res.isErr()) {
|
|
retval += 1;
|
|
} else {
|
|
retval += wcwidth(read_res.unwrap());
|
|
}
|
|
}
|
|
|
|
return retval;
|
|
}
|