diff --git a/ext/psych/extconf.rb b/ext/psych/extconf.rb index 857f8e68..1e52ce8b 100644 --- a/ext/psych/extconf.rb +++ b/ext/psych/extconf.rb @@ -7,35 +7,75 @@ dir_config 'libyaml' -if enable_config("bundled-libyaml", false) || !(find_header('yaml.h') && find_library('yaml', 'yaml_get_version')) - # Embed libyaml since we could not find it. +$VPATH << "$(srcdir)/yaml" +$INCFLAGS << " -I$(srcdir)/yaml" - $VPATH << "$(srcdir)/yaml" - $INCFLAGS << " -I$(srcdir)/yaml" +$srcs = Dir.glob("#{$srcdir}/{,yaml/}*.c").map {|n| File.basename(n)}.sort - $srcs = Dir.glob("#{$srcdir}/{,yaml/}*.c").map {|n| File.basename(n)}.sort +header = 'yaml/yaml.h' +header = "{$(VPATH)}#{header}" if $nmake +if have_macro("_WIN32") + $CPPFLAGS << " -DYAML_DECLARE_STATIC -DHAVE_CONFIG_H" +end + +have_header 'dlfcn.h' +have_header 'inttypes.h' +have_header 'memory.h' +have_header 'stdint.h' +have_header 'stdlib.h' +have_header 'strings.h' +have_header 'string.h' +have_header 'sys/stat.h' +have_header 'sys/types.h' +have_header 'unistd.h' +have_header 'unicode/ucol.h' + +find_header 'yaml.h' +have_header 'config.h' - header = 'yaml/yaml.h' - header = "{$(VPATH)}#{header}" if $nmake - if have_macro("_WIN32") - $CPPFLAGS << " -DYAML_DECLARE_STATIC -DHAVE_CONFIG_H" +## +# ICU dependency +# + +ldflags = cppflags = nil + +if RbConfig::CONFIG["host_os"] =~ /darwin/ + begin + brew_prefix = `brew --prefix icu4c`.chomp + ldflags = "#{brew_prefix}/lib" + cppflags = "#{brew_prefix}/include" + pkg_conf = "#{brew_prefix}/lib/pkgconfig" + # pkg_config should be less error prone than parsing compiler + # commandline options, but we need to set default ldflags and cpp flags + # in case the user doesn't have pkg-config installed + ENV['PKG_CONFIG_PATH'] ||= pkg_conf + rescue end +end - have_header 'dlfcn.h' - have_header 'inttypes.h' - have_header 'memory.h' - have_header 'stdint.h' - have_header 'stdlib.h' - have_header 'strings.h' - have_header 'string.h' - have_header 'sys/stat.h' - have_header 'sys/types.h' - have_header 'unistd.h' - - find_header 'yaml.h' - have_header 'config.h' +dir_config 'icu', cppflags, ldflags + +pkg_config("icu-i18n") +pkg_config("icu-io") +pkg_config("icu-uc") + +$CXXFLAGS << ' -std=c++11' unless $CXXFLAGS.include?("-std=") + +unless have_library 'icui18n' and have_header 'unicode/ucnv.h' + STDERR.puts "\n\n" + STDERR.puts "***************************************************************************************" + STDERR.puts "*********** icu required (brew install icu4c or apt-get install libicu-dev) ***********" + STDERR.puts "***************************************************************************************" + exit(1) end +have_library 'z' or abort 'libz missing' +have_library 'icuuc' or abort 'libicuuc missing' +have_library 'icudata' or abort 'libicudata missing' + +$CFLAGS << ' -Wall -funroll-loops' +$CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG'] + create_makefile 'psych' do |mk| mk << "YAML_H = #{header}".strip << "\n" end diff --git a/ext/psych/yaml/emitter.c b/ext/psych/yaml/emitter.c index 796294cc..6a5d47fc 100644 --- a/ext/psych/yaml/emitter.c +++ b/ext/psych/yaml/emitter.c @@ -1,6 +1,7 @@ - #include "yaml_private.h" +#include +#include /* * Flush the buffer if needed. */ @@ -416,6 +417,43 @@ yaml_emitter_increase_indent(yaml_emitter_t *emitter, return 1; } +/* + * Checks if given utf-8 encoded code point represent printable character. + */ + +static inline int +yaml_emitter_is_printable(yaml_string_t string) +{ + unsigned char octet; + unsigned int width; + unsigned int value; + + octet = string.pointer[0]; + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; + value = (octet & 0x80) == 0x00 ? octet & 0x7F : + (octet & 0xE0) == 0xC0 ? octet & 0x1F : + (octet & 0xF0) == 0xE0 ? octet & 0x0F : + (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; + for (int k = 1; k < (int)width; k ++) { + octet = string.pointer[k]; + value = (value << 6) + (octet & 0x3F); + } + return (((string).pointer[0] == 0x0A) + || ((string).pointer[0] >= 0x20 && (string).pointer[0] <= 0x7E) + || ((string).pointer[0] == 0xC2 && (string).pointer[1] >= 0xA0) + || ((string).pointer[0] > 0xC2 && (string).pointer[0] < 0xED) + || ((string).pointer[0] == 0xED && (string).pointer[1] < 0xA0) + || ((string).pointer[0] == 0xEE) + || ((string).pointer[0] == 0xEF + && !((string).pointer[1] == 0xBB && (string).pointer[2] == 0xBF) + && !((string).pointer[1] == 0xBF + && ((string).pointer[2] == 0xBE || (string).pointer[2] == 0xBF))) + || u_isprint(value)); +} + /* * State dispatcher. */ @@ -1598,7 +1636,7 @@ yaml_emitter_analyze_scalar(yaml_emitter_t *emitter, } } - if (!IS_PRINTABLE(string) + if (!yaml_emitter_is_printable(string) || (!IS_ASCII(string) && !emitter->unicode)) { special_characters = 1; } @@ -2061,7 +2099,7 @@ yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter, while (string.pointer != string.end) { - if (!IS_PRINTABLE(string) || (!emitter->unicode && !IS_ASCII(string)) + if (!yaml_emitter_is_printable(string) || (!emitter->unicode && !IS_ASCII(string)) || IS_BOM(string) || IS_BREAK(string) || CHECK(string, '"') || CHECK(string, '\\')) { diff --git a/ext/psych/yaml/yaml_private.h b/ext/psych/yaml/yaml_private.h index 266a6bd3..84c7208d 100644 --- a/ext/psych/yaml/yaml_private.h +++ b/ext/psych/yaml/yaml_private.h @@ -258,30 +258,6 @@ yaml_string_join( #define IS_ASCII(string) IS_ASCII_AT((string),0) -/* - * Check if the character can be printed unescaped. - */ - -#define IS_PRINTABLE_AT(string,offset) \ - (((string).pointer[offset] == 0x0A) /* . == #x0A */ \ - || ((string).pointer[offset] >= 0x20 /* #x20 <= . <= #x7E */ \ - && (string).pointer[offset] <= 0x7E) \ - || ((string).pointer[offset] == 0xC2 /* #0xA0 <= . <= #xD7FF */ \ - && (string).pointer[offset+1] >= 0xA0) \ - || ((string).pointer[offset] > 0xC2 \ - && (string).pointer[offset] < 0xED) \ - || ((string).pointer[offset] == 0xED \ - && (string).pointer[offset+1] < 0xA0) \ - || ((string).pointer[offset] == 0xEE) \ - || ((string).pointer[offset] == 0xEF /* #xE000 <= . <= #xFFFD */ \ - && !((string).pointer[offset+1] == 0xBB /* && . != #xFEFF */ \ - && (string).pointer[offset+2] == 0xBF) \ - && !((string).pointer[offset+1] == 0xBF \ - && ((string).pointer[offset+2] == 0xBE \ - || (string).pointer[offset+2] == 0xBF)))) - -#define IS_PRINTABLE(string) IS_PRINTABLE_AT((string),0) - /* * Check if the character at the specified position is NUL. */ diff --git a/test/psych/test_string.rb b/test/psych/test_string.rb index 0dc34b30..f9568ef4 100644 --- a/test/psych/test_string.rb +++ b/test/psych/test_string.rb @@ -178,6 +178,16 @@ def test_string_with_base_60 assert_equal '01:03:05', Psych.load(yaml) end + def test_unicode_string + yaml = Psych.dump '😃'.encode('utf-8') + assert_match "😃", yaml + end + + def test_original_issue_unicode_string + yaml = Psych.dump '🇩🇪'.encode('utf-8') + assert_match "🇩🇪", yaml + end + def test_nonascii_string_as_binary string = "hello \x80 world!".dup string.force_encoding 'ascii-8bit'