diff --git a/src/v2/pdf_resources/page_font.h b/src/v2/pdf_resources/page_font.h index 06ac6150..b4626907 100644 --- a/src/v2/pdf_resources/page_font.h +++ b/src/v2/pdf_resources/page_font.h @@ -54,6 +54,7 @@ namespace pdflib std::string get_correct_character(uint32_t c); std::string get_character_from_encoding(uint32_t c); + std::string decode_cname(const std::string& name); void init_encoding(); void init_subtype(); @@ -569,9 +570,42 @@ namespace pdflib << "; Encoding: " << to_string(encoding) << "; font-name: " << font_name; - return notdef; + return notdef; + } + } + } + + // Decode glyph names of the form '/Cnnn'. + // - For 0<=nnn<128 we interpret it as standard ASCII. + // - For 128<=nnn<256 we use the font's encoding and + // fall back to ISO-8859-1 if undefined. + // - Otherwise return the name unchanged. + std::string pdf_resource::decode_cname(const std::string& name) + { + static const std::regex re_ascii(R"(^\/C(\d+)$)"); + + std::smatch m; + if(std::regex_match(name, m, re_ascii)) + { + int code = std::stoi(m[1].str()); + if(code >= 0 && code < 128) + { + return std::string(1, static_cast(code)); + } + else if(code >= 0 && code < 256) + { + std::string decoded = get_character_from_encoding(code); + if(decoded.rfind("GLYPH<", 0) == 0) + { + std::string tmp; + utf8::append(code, std::back_inserter(tmp)); + return tmp; + } + return decoded; } } + + return name; } void pdf_resource::set(std::string font_key_, @@ -1829,7 +1863,7 @@ namespace pdflib } else { - diff_numb_to_char[numb] = name; + diff_numb_to_char[numb] = decode_cname(name); LOG_S(WARNING) << "differences["< " << name; }