[ansi_scrubber] minor optimization and remove the str.erase()

Related to #1257
pull/1265/head
Tim Stack 2 months ago
parent 84000f46f7
commit 65634ad9b3

@ -125,12 +125,15 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
static const auto semi_pred = string_fragment::tag1{';'}; static const auto semi_pred = string_fragment::tag1{';'};
const auto& regex = ansi_regex(); const auto& regex = ansi_regex();
int64_t origin_offset = 0;
int last_origin_offset_end = 0;
nonstd::optional<std::string> href; nonstd::optional<std::string> href;
size_t href_start = 0; size_t href_start = 0;
string_attrs_t tmp_sa;
size_t cp_dst = std::string::npos;
size_t cp_start = std::string::npos;
int last_origin_end = 0;
int erased = 0;
replace(str.begin(), str.end(), '\0', ' '); std::replace(str.begin(), str.end(), '\0', ' ');
auto matcher = regex.capture_from(str).into(md); auto matcher = regex.capture_from(str).into(md);
while (true) { while (true) {
auto match_res = matcher.matches(PCRE2_NO_UTF_CHECK); auto match_res = matcher.matches(PCRE2_NO_UTF_CHECK);
@ -146,10 +149,18 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
const auto sf = md[0].value(); const auto sf = md[0].value();
auto bs_index_res = sf.codepoint_to_byte_index(1); auto bs_index_res = sf.codepoint_to_byte_index(1);
if (cp_dst != std::string::npos) {
auto cp_len = sf.sf_begin - cp_start;
memmove(&str[cp_dst], &str[cp_start], cp_len);
cp_dst += cp_len;
} else {
cp_dst = sf.sf_begin;
}
if (sf.length() >= 3 && bs_index_res.isOk() if (sf.length() >= 3 && bs_index_res.isOk()
&& sf[bs_index_res.unwrap()] == '\b') && sf[bs_index_res.unwrap()] == '\b')
{ {
ssize_t fill_index = sf.sf_begin; ssize_t fill_index = cp_dst;
line_range bold_range; line_range bold_range;
line_range ul_range; line_range ul_range;
auto sub_sf = sf; auto sub_sf = sf;
@ -176,8 +187,8 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
if (sa != nullptr && bold_range.is_valid()) { if (sa != nullptr && bold_range.is_valid()) {
shift_string_attrs( shift_string_attrs(
*sa, bold_range.lr_start, -bold_range.length() * 2); *sa, bold_range.lr_start, -bold_range.length() * 2);
sa->emplace_back(bold_range, tmp_sa.emplace_back(bold_range,
VC_STYLE.value(text_attrs{A_BOLD})); VC_STYLE.value(text_attrs{A_BOLD}));
bold_range.clear(); bold_range.clear();
} }
if (ul_range.is_valid()) { if (ul_range.is_valid()) {
@ -197,7 +208,7 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
if (sa != nullptr && ul_range.is_valid()) { if (sa != nullptr && ul_range.is_valid()) {
shift_string_attrs( shift_string_attrs(
*sa, ul_range.lr_start, -ul_range.length() * 2); *sa, ul_range.lr_start, -ul_range.length() * 2);
sa->emplace_back( tmp_sa.emplace_back(
ul_range, VC_STYLE.value(text_attrs{A_UNDERLINE})); ul_range, VC_STYLE.value(text_attrs{A_UNDERLINE}));
ul_range.clear(); ul_range.clear();
} }
@ -223,37 +234,33 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
sub_sf = rhs_pair.second; sub_sf = rhs_pair.second;
} }
auto output_size = fill_index - sf.sf_begin; auto output_size = fill_index - cp_dst;
auto erased_size = sub_sf.sf_begin - fill_index;
if (sa != nullptr && ul_range.is_valid()) { if (sa != nullptr && ul_range.is_valid()) {
shift_string_attrs( shift_string_attrs(
*sa, ul_range.lr_start, -ul_range.length() * 2); *sa, ul_range.lr_start, -ul_range.length() * 2);
sa->emplace_back(ul_range, tmp_sa.emplace_back(ul_range,
VC_STYLE.value(text_attrs{A_UNDERLINE})); VC_STYLE.value(text_attrs{A_UNDERLINE}));
ul_range.clear(); ul_range.clear();
} }
if (sa != nullptr && bold_range.is_valid()) { if (sa != nullptr && bold_range.is_valid()) {
shift_string_attrs( shift_string_attrs(
*sa, bold_range.lr_start, -bold_range.length() * 2); *sa, bold_range.lr_start, -bold_range.length() * 2);
sa->emplace_back(bold_range, tmp_sa.emplace_back(bold_range,
VC_STYLE.value(text_attrs{A_BOLD})); VC_STYLE.value(text_attrs{A_BOLD}));
bold_range.clear(); bold_range.clear();
} }
if (sa != nullptr && output_size > 0) { if (sa != nullptr && output_size > 0 && cp_dst > 0) {
sa->emplace_back(line_range{last_origin_offset_end, tmp_sa.emplace_back(
sf.sf_begin + (int) output_size}, line_range{
SA_ORIGIN_OFFSET.value(origin_offset)); (int) last_origin_end,
} (int) cp_dst + (int) output_size,
},
str.erase(str.begin() + fill_index, str.begin() + sub_sf.sf_begin); SA_ORIGIN_OFFSET.value(erased));
if (!mid_sf.empty()) {
last_origin_offset_end = mid_sf.sf_begin;
} else {
last_origin_offset_end = sf.sf_begin + output_size;
} }
origin_offset += erased_size; last_origin_end = cp_dst + output_size;
matcher.reload_input(str, last_origin_offset_end); cp_dst = fill_index;
cp_start = sub_sf.sf_begin;
erased += sf.length() - output_size;
continue; continue;
} }
@ -275,16 +282,18 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
if (href) { if (href) {
if (sa != nullptr) { if (sa != nullptr) {
sa->emplace_back( tmp_sa.emplace_back(
line_range{(int) href_start, line_range{
(int) str.size()}, (int) href_start,
(int) cp_dst,
},
VC_HYPERLINK.value(href.value())); VC_HYPERLINK.value(href.value()));
} }
href = nonstd::nullopt; href = nonstd::nullopt;
} }
if (!uri.empty()) { if (!uri.empty()) {
href = uri.to_string(); href = uri.to_string();
href_start = sf.sf_begin; href_start = cp_dst;
} }
} }
break; break;
@ -370,6 +379,7 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
has_attrs = true; has_attrs = true;
break; break;
#if 0
case 'C': { case 'C': {
auto spaces_res auto spaces_res
= scn::scan_value<unsigned int>(seq.to_string_view()); = scn::scan_value<unsigned int>(seq.to_string_view());
@ -397,6 +407,7 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
} }
break; break;
} }
#endif
case 'O': { case 'O': {
auto role_res = scn::scan_value<int>(seq.to_string_view()); auto role_res = scn::scan_value<int>(seq.to_string_view());
@ -415,40 +426,54 @@ scrub_ansi_string(std::string& str, string_attrs_t* sa)
} }
} }
if (md[1] || md[3] || md[5]) { if (md[1] || md[3] || md[5]) {
str.erase(str.begin() + sf.sf_begin, str.begin() + sf.sf_end);
if (sa != nullptr) { if (sa != nullptr) {
shift_string_attrs(*sa, sf.sf_begin, -sf.length()); shift_string_attrs(*sa, sf.sf_begin, -sf.length());
if (has_attrs) { if (has_attrs) {
for (auto rit = sa->rbegin(); rit != sa->rend(); rit++) { for (auto rit = tmp_sa.rbegin(); rit != tmp_sa.rend();
rit++)
{
if (rit->sa_range.lr_end != -1) { if (rit->sa_range.lr_end != -1) {
continue; continue;
} }
rit->sa_range.lr_end = sf.sf_begin; rit->sa_range.lr_end = cp_dst;
} }
lr.lr_start = sf.sf_begin; lr.lr_start = cp_dst;
lr.lr_end = -1; lr.lr_end = -1;
if (!attrs.empty()) { if (!attrs.empty()) {
sa->emplace_back(lr, VC_STYLE.value(attrs)); tmp_sa.emplace_back(lr, VC_STYLE.value(attrs));
} }
role | [&lr, &sa](role_t r) { role | [&lr, &tmp_sa](role_t r) {
sa->emplace_back(lr, VC_ROLE.value(r)); tmp_sa.emplace_back(lr, VC_ROLE.value(r));
}; };
} }
sa->emplace_back( if (cp_dst > 0) {
line_range{last_origin_offset_end, sf.sf_begin}, tmp_sa.emplace_back(
SA_ORIGIN_OFFSET.value(origin_offset)); line_range{
last_origin_offset_end = sf.sf_begin; (int) last_origin_end,
origin_offset += sf.length(); (int) cp_dst,
},
SA_ORIGIN_OFFSET.value(erased));
}
last_origin_end = cp_dst;
} }
erased += sf.length();
matcher.reload_input(str, sf.sf_begin);
} }
cp_start = sf.sf_end;
} }
if (sa != nullptr && last_origin_offset_end > 0) { if (cp_dst != std::string::npos) {
sa->emplace_back(line_range{last_origin_offset_end, (int) str.size()}, auto cp_len = str.size() - cp_start;
SA_ORIGIN_OFFSET.value(origin_offset)); memmove(&str[cp_dst], &str[cp_start], cp_len);
cp_dst += cp_len;
str.resize(cp_dst);
}
if (sa != nullptr && last_origin_end > 0 && last_origin_end != str.size()) {
tmp_sa.emplace_back(line_range{(int) last_origin_end, (int) str.size()},
SA_ORIGIN_OFFSET.value(erased));
}
if (sa != nullptr) {
sa->insert(sa->end(), tmp_sa.begin(), tmp_sa.end());
} }
} }

@ -221,6 +221,13 @@ public:
return retval.with_ansi_string("%s", str); return retval.with_ansi_string("%s", str);
} }
static inline attr_line_t from_ansi_str(const std::string& str)
{
attr_line_t retval;
return retval.with_ansi_string(str);
}
/** @return The string itself. */ /** @return The string itself. */
std::string& get_string() { return this->al_string; } std::string& get_string() { return this->al_string; }

@ -289,12 +289,14 @@ execute_sql(exec_context& ec, const std::string& sql, std::string& alt_msg)
auto um auto um
= lnav::console::user_message::error( = lnav::console::user_message::error(
attr_line_t("unable to compile PRQL: ").append(stmt_al)) attr_line_t("unable to compile PRQL: ").append(stmt_al))
.with_reason((std::string) msg.reason); .with_reason(
attr_line_t::from_ansi_str((std::string) msg.reason));
if (!msg.display.empty()) { if (!msg.display.empty()) {
um.with_note((std::string) msg.display); um.with_note(
attr_line_t::from_ansi_str((std::string) msg.display));
} }
for (const auto& hint : msg.hints) { for (const auto& hint : msg.hints) {
um.with_help(hint.data()); um.with_help(attr_line_t::from_ansi_str((std::string) hint));
break; break;
} }
return Err(um); return Err(um);

@ -899,8 +899,8 @@ metadata::possibility_provider(const std::vector<section_key_t>& path)
namespace fmt { namespace fmt {
auto auto
formatter<lnav::document::section_key_t>::format( formatter<lnav::document::section_key_t>::format(
const lnav::document::section_key_t& key, fmt::format_context& ctx) const lnav::document::section_key_t& key,
-> decltype(ctx.out()) const fmt::format_context& ctx) -> decltype(ctx.out()) const
{ {
return key.match( return key.match(
[this, &ctx](const std::string& str) { [this, &ctx](const std::string& str) {

@ -879,8 +879,13 @@ textfile_sub_source::rescan_files(
auto ms_iter = this->tss_doc_metadata.find(lf->get_filename()); auto ms_iter = this->tss_doc_metadata.find(lf->get_filename());
if (!new_data && ms_iter != this->tss_doc_metadata.end()) { if (!new_data && ms_iter != this->tss_doc_metadata.end()) {
if (st.st_mtime != ms_iter->second.ms_mtime // Only invalidate the meta if the file is small, or we
|| st.st_size != ms_iter->second.ms_file_size) // found some meta previously.
if ((st.st_mtime != ms_iter->second.ms_mtime
|| st.st_size != ms_iter->second.ms_file_size)
&& (st.st_size < 10 * 1024
|| !ms_iter->second.ms_metadata.m_sections_tree
.empty()))
{ {
log_debug( log_debug(
"text file has changed, invalidating metadata. " "text file has changed, invalidating metadata. "

@ -541,6 +541,12 @@ view_curses::mvwattrline(WINDOW* window,
auto desired_fg = fg_color[lpc] != -1 ? fg_color[lpc] : cur_fg; auto desired_fg = fg_color[lpc] != -1 ? fg_color[lpc] : cur_fg;
auto desired_bg = bg_color[lpc] != -1 ? bg_color[lpc] : cur_bg; auto desired_bg = bg_color[lpc] != -1 ? bg_color[lpc] : cur_bg;
if (desired_fg >= COLOR_BLACK && desired_fg <= COLOR_WHITE) {
desired_fg = vc.ansi_to_theme_color(desired_fg);
}
if (desired_bg >= COLOR_BLACK && desired_bg <= COLOR_WHITE) {
desired_bg = vc.ansi_to_theme_color(desired_bg);
}
if (desired_fg == desired_bg) { if (desired_fg == desired_bg) {
if (desired_bg >= 0 if (desired_bg >= 0
&& desired_bg && desired_bg

@ -37,6 +37,7 @@
#include <assert.h> #include <assert.h>
#include "base/ansi_scrubber.hh" #include "base/ansi_scrubber.hh"
#include "base/attr_line.builder.hh"
#include "config.h" #include "config.h"
#include "view_curses.hh" #include "view_curses.hh"
@ -46,6 +47,30 @@ int
main(int argc, char* argv[]) main(int argc, char* argv[])
{ {
printf("BEGIN test\n"); printf("BEGIN test\n");
{
std::string zero_width = "\x16 1 \x16 2 \x16";
string_attrs_t sa;
scrub_ansi_string(zero_width, &sa);
printf("zero width: '%s'\n",
fmt::format(FMT_STRING("{:?}"), zero_width).c_str());
assert(zero_width == " 1 2 ");
for (const auto& attr : sa) {
printf("attr %d:%d %s\n",
attr.sa_range.lr_start,
attr.sa_range.lr_end,
attr.sa_type->sat_name);
if (attr.sa_type == &VC_HYPERLINK) {
printf(" value: %s\n",
attr.sa_value.get<std::string>().c_str());
}
if (attr.sa_type == &SA_ORIGIN_OFFSET) {
printf(" value: %lld\n", attr.sa_value.get<int64_t>());
}
}
}
{ {
std::string bad_bold = "That is not\b\b\ball\n"; std::string bad_bold = "That is not\b\b\ball\n";
string_attrs_t sa; string_attrs_t sa;
@ -109,16 +134,49 @@ main(int argc, char* argv[])
} }
} }
} }
{
string_attrs_t sa;
string str_cp;
str_cp = "Hello, World!";
scrub_ansi_string(str_cp, &sa);
assert(str_cp == "Hello, World!");
assert(sa.empty());
str_cp = "Hello\x1b[44;m, \x1b[33;mWorld\x1b[0;m!";
scrub_ansi_string(str_cp, &sa);
assert(str_cp == "Hello, World!");
printf("%s\n", str_cp.c_str());
for (const auto& attr : sa) {
printf(" attr %d:%d %s %s\n",
attr.sa_range.lr_start,
attr.sa_range.lr_end,
attr.sa_type->sat_name,
string_fragment::from_str_range(
str_cp, attr.sa_range.lr_start, attr.sa_range.lr_end)
.to_string()
.c_str());
}
}
{ {
// "•]8;;http://example.com•\This_is_a_link•]8;;•\_"
auto hlink = std::string( auto hlink = std::string(
"\033]8;;http://example.com\033\\This is a " "\033]8;;http://example.com\033\\This is a "
"link\033]8;;\033\\\n"); "link\033]8;;\033\\\n");
auto al = attr_line_t();
attr_line_builder alb(al);
alb.append_as_hexdump(hlink);
printf("%s\n", al.get_string().c_str());
string_attrs_t sa; string_attrs_t sa;
scrub_ansi_string(hlink, &sa); scrub_ansi_string(hlink, &sa);
printf("hlink %d %d %s", hlink.size(), sa.size(), hlink.c_str()); printf("hlink %d %d %s", hlink.size(), sa.size(), hlink.c_str());
assert(sa.size() == 4); assert(sa.size() == 3);
for (const auto& attr : sa) { for (const auto& attr : sa) {
printf("attr %d:%d %s\n", printf("attr %d:%d %s\n",
attr.sa_range.lr_start, attr.sa_range.lr_start,
@ -128,19 +186,9 @@ main(int argc, char* argv[])
printf(" value: %s\n", printf(" value: %s\n",
attr.sa_value.get<std::string>().c_str()); attr.sa_value.get<std::string>().c_str());
} }
if (attr.sa_type == &SA_ORIGIN_OFFSET) {
printf(" value: %lld\n", attr.sa_value.get<int64_t>());
}
} }
} }
string_attrs_t sa;
string str_cp;
str_cp = "Hello, World!";
scrub_ansi_string(str_cp, &sa);
assert(str_cp == "Hello, World!");
assert(sa.empty());
str_cp = "Hello\x1b[44;m, \x1b[33;mWorld\x1b[0;m!";
scrub_ansi_string(str_cp, &sa);
assert(str_cp == "Hello, World!");
} }

Loading…
Cancel
Save