Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement emoji support #508

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 62 additions & 22 deletions ext/psych/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,75 @@

dir_config 'libyaml'

if enable_config("bundled-libyaml", false) || !(find_header('yaml.h') && find_library('yaml', 'yaml_get_version'))
# Embed libyaml since we could not find it.
$VPATH << "$(srcdir)/yaml"
$INCFLAGS << " -I$(srcdir)/yaml"

$VPATH << "$(srcdir)/yaml"
$INCFLAGS << " -I$(srcdir)/yaml"
$srcs = Dir.glob("#{$srcdir}/{,yaml/}*.c").map {|n| File.basename(n)}.sort

$srcs = Dir.glob("#{$srcdir}/{,yaml/}*.c").map {|n| File.basename(n)}.sort
header = 'yaml/yaml.h'
header = "{$(VPATH)}#{header}" if $nmake
if have_macro("_WIN32")
$CPPFLAGS << " -DYAML_DECLARE_STATIC -DHAVE_CONFIG_H"
end

have_header 'dlfcn.h'
have_header 'inttypes.h'
have_header 'memory.h'
have_header 'stdint.h'
have_header 'stdlib.h'
have_header 'strings.h'
have_header 'string.h'
have_header 'sys/stat.h'
have_header 'sys/types.h'
have_header 'unistd.h'
have_header 'unicode/ucol.h'

find_header 'yaml.h'
have_header 'config.h'

header = 'yaml/yaml.h'
header = "{$(VPATH)}#{header}" if $nmake
if have_macro("_WIN32")
$CPPFLAGS << " -DYAML_DECLARE_STATIC -DHAVE_CONFIG_H"
##
# ICU dependency
#

ldflags = cppflags = nil

if RbConfig::CONFIG["host_os"] =~ /darwin/
begin
brew_prefix = `brew --prefix icu4c`.chomp
ldflags = "#{brew_prefix}/lib"
cppflags = "#{brew_prefix}/include"
pkg_conf = "#{brew_prefix}/lib/pkgconfig"
# pkg_config should be less error prone than parsing compiler
# commandline options, but we need to set default ldflags and cpp flags
# in case the user doesn't have pkg-config installed
ENV['PKG_CONFIG_PATH'] ||= pkg_conf
rescue
end
end

have_header 'dlfcn.h'
have_header 'inttypes.h'
have_header 'memory.h'
have_header 'stdint.h'
have_header 'stdlib.h'
have_header 'strings.h'
have_header 'string.h'
have_header 'sys/stat.h'
have_header 'sys/types.h'
have_header 'unistd.h'

find_header 'yaml.h'
have_header 'config.h'
dir_config 'icu', cppflags, ldflags

pkg_config("icu-i18n")
pkg_config("icu-io")
pkg_config("icu-uc")

$CXXFLAGS << ' -std=c++11' unless $CXXFLAGS.include?("-std=")

unless have_library 'icui18n' and have_header 'unicode/ucnv.h'
STDERR.puts "\n\n"
STDERR.puts "***************************************************************************************"
STDERR.puts "*********** icu required (brew install icu4c or apt-get install libicu-dev) ***********"
STDERR.puts "***************************************************************************************"
exit(1)
end

have_library 'z' or abort 'libz missing'
have_library 'icuuc' or abort 'libicuuc missing'
have_library 'icudata' or abort 'libicudata missing'

$CFLAGS << ' -Wall -funroll-loops'
$CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']

create_makefile 'psych' do |mk|
mk << "YAML_H = #{header}".strip << "\n"
end
Expand Down
44 changes: 41 additions & 3 deletions ext/psych/yaml/emitter.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

#include "yaml_private.h"

#include <unicode/utf8.h>
#include <unicode/uchar.h>
/*
* Flush the buffer if needed.
*/
Expand Down Expand Up @@ -416,6 +417,43 @@ yaml_emitter_increase_indent(yaml_emitter_t *emitter,
return 1;
}

/*
* Checks if given utf-8 encoded code point represent printable character.
*/

static inline int
yaml_emitter_is_printable(yaml_string_t string)
{
unsigned char octet;
unsigned int width;
unsigned int value;

octet = string.pointer[0];
width = (octet & 0x80) == 0x00 ? 1 :
(octet & 0xE0) == 0xC0 ? 2 :
(octet & 0xF0) == 0xE0 ? 3 :
(octet & 0xF8) == 0xF0 ? 4 : 0;
value = (octet & 0x80) == 0x00 ? octet & 0x7F :
(octet & 0xE0) == 0xC0 ? octet & 0x1F :
(octet & 0xF0) == 0xE0 ? octet & 0x0F :
(octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
for (int k = 1; k < (int)width; k ++) {
octet = string.pointer[k];
value = (value << 6) + (octet & 0x3F);
}
return (((string).pointer[0] == 0x0A)
|| ((string).pointer[0] >= 0x20 && (string).pointer[0] <= 0x7E)
|| ((string).pointer[0] == 0xC2 && (string).pointer[1] >= 0xA0)
|| ((string).pointer[0] > 0xC2 && (string).pointer[0] < 0xED)
|| ((string).pointer[0] == 0xED && (string).pointer[1] < 0xA0)
|| ((string).pointer[0] == 0xEE)
|| ((string).pointer[0] == 0xEF
&& !((string).pointer[1] == 0xBB && (string).pointer[2] == 0xBF)
&& !((string).pointer[1] == 0xBF
&& ((string).pointer[2] == 0xBE || (string).pointer[2] == 0xBF)))
|| u_isprint(value));
}

/*
* State dispatcher.
*/
Expand Down Expand Up @@ -1598,7 +1636,7 @@ yaml_emitter_analyze_scalar(yaml_emitter_t *emitter,
}
}

if (!IS_PRINTABLE(string)
if (!yaml_emitter_is_printable(string)
|| (!IS_ASCII(string) && !emitter->unicode)) {
special_characters = 1;
}
Expand Down Expand Up @@ -2061,7 +2099,7 @@ yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter,

while (string.pointer != string.end)
{
if (!IS_PRINTABLE(string) || (!emitter->unicode && !IS_ASCII(string))
if (!yaml_emitter_is_printable(string) || (!emitter->unicode && !IS_ASCII(string))
|| IS_BOM(string) || IS_BREAK(string)
|| CHECK(string, '"') || CHECK(string, '\\'))
{
Expand Down
24 changes: 0 additions & 24 deletions ext/psych/yaml/yaml_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,30 +258,6 @@ yaml_string_join(

#define IS_ASCII(string) IS_ASCII_AT((string),0)

/*
* Check if the character can be printed unescaped.
*/

#define IS_PRINTABLE_AT(string,offset) \
(((string).pointer[offset] == 0x0A) /* . == #x0A */ \
|| ((string).pointer[offset] >= 0x20 /* #x20 <= . <= #x7E */ \
&& (string).pointer[offset] <= 0x7E) \
|| ((string).pointer[offset] == 0xC2 /* #0xA0 <= . <= #xD7FF */ \
&& (string).pointer[offset+1] >= 0xA0) \
|| ((string).pointer[offset] > 0xC2 \
&& (string).pointer[offset] < 0xED) \
|| ((string).pointer[offset] == 0xED \
&& (string).pointer[offset+1] < 0xA0) \
|| ((string).pointer[offset] == 0xEE) \
|| ((string).pointer[offset] == 0xEF /* #xE000 <= . <= #xFFFD */ \
&& !((string).pointer[offset+1] == 0xBB /* && . != #xFEFF */ \
&& (string).pointer[offset+2] == 0xBF) \
&& !((string).pointer[offset+1] == 0xBF \
&& ((string).pointer[offset+2] == 0xBE \
|| (string).pointer[offset+2] == 0xBF))))

#define IS_PRINTABLE(string) IS_PRINTABLE_AT((string),0)

/*
* Check if the character at the specified position is NUL.
*/
Expand Down
10 changes: 10 additions & 0 deletions test/psych/test_string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,16 @@ def test_string_with_base_60
assert_equal '01:03:05', Psych.load(yaml)
end

def test_unicode_string
yaml = Psych.dump '😃'.encode('utf-8')
assert_match "😃", yaml
end

def test_original_issue_unicode_string
yaml = Psych.dump '🇩🇪'.encode('utf-8')
assert_match "🇩🇪", yaml
end

def test_nonascii_string_as_binary
string = "hello \x80 world!".dup
string.force_encoding 'ascii-8bit'
Expand Down