mirror of https://github.com/tstack/lnav
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
454 lines
13 KiB
C++
454 lines
13 KiB
C++
/**
|
|
* Copyright (c) 2007-2012, Timothy Stack
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
* * Neither the name of Timothy Stack nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* @file pcrepp.cc
|
|
*/
|
|
|
|
#include "pcrepp.hh"
|
|
|
|
const int JIT_STACK_MIN_SIZE = 32 * 1024;
|
|
const int JIT_STACK_MAX_SIZE = 512 * 1024;
|
|
|
|
pcre_context::capture_t*
|
|
pcre_context::operator[](const char* name) const
|
|
{
|
|
capture_t* retval = nullptr;
|
|
auto index = this->pc_pcre->name_index(name);
|
|
if (index != PCRE_ERROR_NOSUBSTRING) {
|
|
retval = &this->pc_captures[index + 1];
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
pcre_context::capture_t*
|
|
pcre_context::first_valid() const
|
|
{
|
|
for (int lpc = 1; lpc < this->pc_count; lpc++) {
|
|
if (this->pc_captures[lpc].is_valid()) {
|
|
return &this->pc_captures[lpc];
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
std::string
|
|
pcrepp::quote(const char* unquoted)
|
|
{
|
|
std::string retval;
|
|
|
|
for (int lpc = 0; unquoted[lpc]; lpc++) {
|
|
if (isalnum(unquoted[lpc]) || unquoted[lpc] == '_'
|
|
|| unquoted[lpc] & 0x80)
|
|
{
|
|
retval.push_back(unquoted[lpc]);
|
|
} else {
|
|
retval.push_back('\\');
|
|
retval.push_back(unquoted[lpc]);
|
|
}
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
Result<pcrepp, pcrepp::compile_error>
|
|
pcrepp::from_str(std::string pattern, int options)
|
|
{
|
|
const char* errptr;
|
|
int eoff;
|
|
auto* code = pcre_compile(
|
|
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr);
|
|
|
|
if (!code) {
|
|
return Err(compile_error{errptr, eoff});
|
|
}
|
|
|
|
return Ok(pcrepp(std::move(pattern), code));
|
|
}
|
|
|
|
Result<std::shared_ptr<pcrepp>, pcrepp::compile_error>
|
|
pcrepp::shared_from_str(std::string pattern, int options)
|
|
{
|
|
const char* errptr;
|
|
int eoff;
|
|
auto* code = pcre_compile(
|
|
pattern.c_str(), options | PCRE_UTF8, &errptr, &eoff, nullptr);
|
|
|
|
if (!code) {
|
|
return Err(compile_error{errptr, eoff});
|
|
}
|
|
|
|
return Ok(std::make_shared<pcrepp>(std::move(pattern), code));
|
|
}
|
|
|
|
void
|
|
pcrepp::find_captures(const char* pattern)
|
|
{
|
|
bool in_class = false, in_escape = false, in_literal = false;
|
|
std::vector<pcre_context::capture_t> cap_in_progress;
|
|
|
|
for (int lpc = 0; pattern[lpc]; lpc++) {
|
|
if (in_escape) {
|
|
in_escape = false;
|
|
if (pattern[lpc] == 'Q') {
|
|
in_literal = true;
|
|
}
|
|
} else if (in_class) {
|
|
if (pattern[lpc] == ']') {
|
|
in_class = false;
|
|
}
|
|
if (pattern[lpc] == '\\') {
|
|
in_escape = true;
|
|
}
|
|
} else if (in_literal) {
|
|
if (pattern[lpc] == '\\' && pattern[lpc + 1] == 'E') {
|
|
in_literal = false;
|
|
lpc += 1;
|
|
}
|
|
} else {
|
|
switch (pattern[lpc]) {
|
|
case '\\':
|
|
in_escape = true;
|
|
break;
|
|
case '[':
|
|
in_class = true;
|
|
break;
|
|
case '(':
|
|
cap_in_progress.emplace_back(lpc, lpc);
|
|
break;
|
|
case ')': {
|
|
if (!cap_in_progress.empty()) {
|
|
static const auto DEFINE_SF
|
|
= string_fragment::from_const("(?(DEFINE)");
|
|
|
|
auto& cap = cap_in_progress.back();
|
|
char first = '\0', second = '\0', third = '\0';
|
|
bool is_cap = false;
|
|
|
|
cap.c_end = lpc + 1;
|
|
if (cap.length() >= 2) {
|
|
first = pattern[cap.c_begin + 1];
|
|
}
|
|
if (cap.length() >= 3) {
|
|
second = pattern[cap.c_begin + 2];
|
|
}
|
|
if (cap.length() >= 4) {
|
|
third = pattern[cap.c_begin + 3];
|
|
}
|
|
if (cap.c_begin >= 2) {
|
|
auto poss_define = string_fragment::from_byte_range(
|
|
pattern, cap.c_begin - 2, cap.c_end);
|
|
if (poss_define == DEFINE_SF) {
|
|
cap_in_progress.pop_back();
|
|
continue;
|
|
}
|
|
}
|
|
if (first == '?') {
|
|
if (second == '\'') {
|
|
is_cap = true;
|
|
}
|
|
if (second == '<'
|
|
&& (isalpha(third) || third == '_'))
|
|
{
|
|
is_cap = true;
|
|
}
|
|
if (second == 'P' && third == '<') {
|
|
is_cap = true;
|
|
}
|
|
} else if (first != '*') {
|
|
is_cap = true;
|
|
}
|
|
if (is_cap) {
|
|
this->p_captures.push_back(cap);
|
|
}
|
|
cap_in_progress.pop_back();
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
assert((size_t) this->p_capture_count == this->p_captures.size());
|
|
}
|
|
|
|
bool
|
|
pcrepp::match(pcre_context& pc, pcre_input& pi, int options) const
|
|
{
|
|
int length, startoffset, filtered_options = options;
|
|
int count = pc.get_max_count();
|
|
const char* str;
|
|
int rc;
|
|
|
|
pc.set_pcrepp(this);
|
|
pi.pi_offset = pi.pi_next_offset;
|
|
|
|
str = pi.get_string();
|
|
if (filtered_options & PCRE_ANCHORED) {
|
|
filtered_options &= ~PCRE_ANCHORED;
|
|
str = &str[pi.pi_offset];
|
|
startoffset = 0;
|
|
length = pi.pi_length - pi.pi_offset;
|
|
} else {
|
|
startoffset = pi.pi_offset;
|
|
length = pi.pi_length;
|
|
}
|
|
rc = pcre_exec(this->p_code,
|
|
this->p_code_extra.in(),
|
|
str,
|
|
length,
|
|
startoffset,
|
|
filtered_options,
|
|
(int*) pc.all(),
|
|
count * 2);
|
|
|
|
if (rc < 0) {
|
|
switch (rc) {
|
|
case PCRE_ERROR_NOMATCH:
|
|
break;
|
|
case PCRE_ERROR_PARTIAL:
|
|
pc.set_count(1);
|
|
return true;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
} else if (rc == 0) {
|
|
rc = 0;
|
|
} else if (pc.all()->c_begin == pc.all()->c_end) {
|
|
rc = 0;
|
|
if (pi.pi_next_offset + 1 < pi.pi_length) {
|
|
pi.pi_next_offset += 1;
|
|
}
|
|
} else {
|
|
if (options & PCRE_ANCHORED) {
|
|
for (int lpc = 0; lpc < rc; lpc++) {
|
|
if (pc.all()[lpc].c_begin == -1) {
|
|
continue;
|
|
}
|
|
pc.all()[lpc].c_begin += pi.pi_offset;
|
|
pc.all()[lpc].c_end += pi.pi_offset;
|
|
}
|
|
}
|
|
pi.pi_next_offset = pc.all()->c_end;
|
|
}
|
|
|
|
pc.set_count(rc);
|
|
|
|
return rc > 0;
|
|
}
|
|
|
|
std::string
|
|
pcrepp::replace(const char* str, const char* repl) const
|
|
{
|
|
pcre_context_static<30> pc;
|
|
pcre_input pi(str);
|
|
std::string retval;
|
|
std::string::size_type start = 0;
|
|
|
|
while (pi.pi_offset < pi.pi_length) {
|
|
this->match(pc, pi);
|
|
auto all = pc.all();
|
|
bool in_escape = false;
|
|
|
|
if (pc.get_count() < 0) {
|
|
break;
|
|
}
|
|
|
|
retval.append(str, start, (all->c_begin - start));
|
|
start = all->c_end;
|
|
for (int lpc = 0; repl[lpc]; lpc++) {
|
|
auto ch = repl[lpc];
|
|
|
|
if (in_escape) {
|
|
if (isdigit(ch)) {
|
|
auto capture_index = (ch - '0');
|
|
|
|
if (capture_index < pc.get_count()) {
|
|
retval.append(pi.get_substr_start(&all[capture_index]),
|
|
pi.get_substr_len(&all[capture_index]));
|
|
} else if (capture_index > this->p_capture_count) {
|
|
retval.push_back('\\');
|
|
retval.push_back(ch);
|
|
}
|
|
} else {
|
|
if (ch != '\\') {
|
|
retval.push_back('\\');
|
|
}
|
|
retval.push_back(ch);
|
|
}
|
|
in_escape = false;
|
|
} else {
|
|
switch (ch) {
|
|
case '\\':
|
|
in_escape = true;
|
|
break;
|
|
default:
|
|
retval.push_back(ch);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
retval.append(str, start, std::string::npos);
|
|
|
|
return retval;
|
|
}
|
|
|
|
void
|
|
pcrepp::study()
|
|
{
|
|
const char* errptr;
|
|
|
|
this->p_code_extra = pcre_study(this->p_code,
|
|
#ifdef PCRE_STUDY_JIT_COMPILE
|
|
PCRE_STUDY_JIT_COMPILE,
|
|
#else
|
|
0,
|
|
#endif
|
|
&errptr);
|
|
if (!this->p_code_extra && errptr) {
|
|
// log_error("pcre_study error: %s", errptr);
|
|
}
|
|
if (this->p_code_extra != nullptr) {
|
|
pcre_extra* extra = this->p_code_extra;
|
|
|
|
extra->flags
|
|
|= (PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
|
|
extra->match_limit = 10000;
|
|
extra->match_limit_recursion = 500;
|
|
#ifdef PCRE_STUDY_JIT_COMPILE
|
|
// pcre_assign_jit_stack(extra, nullptr, jit_stack());
|
|
#endif
|
|
}
|
|
pcre_fullinfo(
|
|
this->p_code, this->p_code_extra, PCRE_INFO_OPTIONS, &this->p_options);
|
|
pcre_fullinfo(this->p_code,
|
|
this->p_code_extra,
|
|
PCRE_INFO_CAPTURECOUNT,
|
|
&this->p_capture_count);
|
|
pcre_fullinfo(this->p_code,
|
|
this->p_code_extra,
|
|
PCRE_INFO_NAMECOUNT,
|
|
&this->p_named_count);
|
|
pcre_fullinfo(this->p_code,
|
|
this->p_code_extra,
|
|
PCRE_INFO_NAMEENTRYSIZE,
|
|
&this->p_name_len);
|
|
pcre_fullinfo(this->p_code,
|
|
this->p_code_extra,
|
|
PCRE_INFO_NAMETABLE,
|
|
&this->p_named_entries);
|
|
}
|
|
|
|
#ifdef PCRE_STUDY_JIT_COMPILE
|
|
pcre_jit_stack*
|
|
pcrepp::jit_stack()
|
|
{
|
|
static pcre_jit_stack* retval = nullptr;
|
|
|
|
if (retval == nullptr) {
|
|
retval = pcre_jit_stack_alloc(JIT_STACK_MIN_SIZE, JIT_STACK_MAX_SIZE);
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
size_t
|
|
pcrepp::match_partial(pcre_input& pi) const
|
|
{
|
|
size_t length = pi.pi_length;
|
|
int rc;
|
|
|
|
do {
|
|
rc = pcre_exec(this->p_code,
|
|
this->p_code_extra.in(),
|
|
pi.get_string(),
|
|
length,
|
|
pi.pi_offset,
|
|
PCRE_PARTIAL,
|
|
nullptr,
|
|
0);
|
|
switch (rc) {
|
|
case 0:
|
|
case PCRE_ERROR_PARTIAL:
|
|
return length;
|
|
}
|
|
if (length > 0) {
|
|
length -= 1;
|
|
}
|
|
} while (length > 0);
|
|
|
|
return length;
|
|
}
|
|
|
|
const char*
|
|
pcrepp::name_for_capture(int index) const
|
|
{
|
|
for (pcre_named_capture::iterator iter = this->named_begin();
|
|
iter != this->named_end();
|
|
++iter)
|
|
{
|
|
if (iter->index() == index) {
|
|
return iter->pnc_name;
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
int
|
|
pcrepp::name_index(const char* name) const
|
|
{
|
|
int retval = pcre_get_stringnumber(this->p_code, name);
|
|
|
|
if (retval == PCRE_ERROR_NOSUBSTRING) {
|
|
return retval;
|
|
}
|
|
|
|
return retval - 1;
|
|
}
|
|
|
|
#else
|
|
# warning "pcrejit is not available, search performance will be degraded"
|
|
|
|
void
|
|
pcrepp::pcre_free_study(pcre_extra* extra)
|
|
{
|
|
free(extra);
|
|
}
|
|
#endif
|
|
|
|
void
|
|
pcre_context::capture_t::ltrim(const char* str)
|
|
{
|
|
while (this->c_begin < this->c_end && isspace(str[this->c_begin])) {
|
|
this->c_begin += 1;
|
|
}
|
|
}
|