Skip to content

Commit

Permalink
Add createFromUtf16 support in Synth Traces
Browse files Browse the repository at this point in the history
Summary:
Original Author: [email protected]
Original Git: 8454740
Original Reviewed By: dannysu
Original Revision: D67070209

Add SynthTrace support for `createFromUtf16` APIs in Synth Traces, so we
can record and replay them properly.

Reviewed By: neildhar

Differential Revision: D68599522

fbshipit-source-id: 1b472347ecaaf0f6bcbf8014a08d49a11a1282ed
  • Loading branch information
tsaichien authored and facebook-github-bot committed Jan 24, 2025
1 parent c7abb36 commit a08b71b
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 72 deletions.
40 changes: 26 additions & 14 deletions API/hermes/SynthTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,8 @@ static std::string encodingName(SynthTrace::StringEncodingType encoding) {
return "UTF-8";
case SynthTrace::StringEncodingType::ASCII:
return "ASCII";
case SynthTrace::StringEncodingType::UTF16:
return "UTF-16";
default:
llvm_unreachable("Invalid encoding type encountered.");
}
Expand All @@ -376,27 +378,37 @@ void SynthTrace::CreateStringRecord::toJSONInternal(JSONEmitter &json) const {
Record::toJSONInternal(json);
json.emitKeyValue("objID", objID_);
json.emitKeyValue("encoding", encodingName(encodingType_));
// For UTF-8 Strings, copy the content to a char16 array and emit each byte as
// a code unit. This allows us to reconstruct the exact string byte-for-byte
// during replay.
std::vector<char16_t> char16Vector(
(const unsigned char *)chars_.data(),
(const unsigned char *)chars_.data() + chars_.size());
json.emitKeyValue("chars", llvh::ArrayRef(char16Vector));
if (encodingType_ == StringEncodingType::UTF16) {
json.emitKeyValue(
"chars", llvh::ArrayRef(chars16_.data(), chars16_.size()));
} else {
// For UTF-8 Strings, copy the content to a char16 array and emit each byte
// as a code unit. This allows us to reconstruct the exact string
// byte-for-byte during replay.
std::vector<char16_t> char16Vector(
(const unsigned char *)chars_.data(),
(const unsigned char *)chars_.data() + chars_.size());
json.emitKeyValue("chars", llvh::ArrayRef(char16Vector));
}
}

void SynthTrace::CreatePropNameIDRecord::toJSONInternal(
JSONEmitter &json) const {
Record::toJSONInternal(json);
json.emitKeyValue("objID", propNameID_);
json.emitKeyValue("encoding", encodingName(encodingType_));
// For UTF-8 Strings, copy the content to a char16 array and emit each byte
// as a code unit. This allows us to reconstruct the exact string
// byte-for-byte during replay.
std::vector<char16_t> char16Vector(
(const unsigned char *)chars_.data(),
(const unsigned char *)chars_.data() + chars_.size());
json.emitKeyValue("chars", llvh::ArrayRef(char16Vector));
if (encodingType_ == StringEncodingType::UTF16) {
json.emitKeyValue(
"chars", llvh::ArrayRef(chars16_.data(), chars16_.size()));
} else {
// For UTF-8 Strings, copy the content to a char16 array and emit each byte
// as a code unit. This allows us to reconstruct the exact string
// byte-for-byte during replay.
std::vector<char16_t> char16Vector(
(const unsigned char *)chars_.data(),
(const unsigned char *)chars_.data() + chars_.size());
json.emitKeyValue("chars", llvh::ArrayRef(char16Vector));
}
}

void SynthTrace::CreatePropNameIDWithValueRecord::toJSONInternal(
Expand Down
30 changes: 27 additions & 3 deletions API/hermes/SynthTrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class SynthTrace {
};

/// Represents the encoding type of a String or PropNameId
enum class StringEncodingType { ASCII, UTF8 };
enum class StringEncodingType { ASCII, UTF8, UTF16 };

/// A TimePoint is a time when some event occurred.
using TimePoint = std::chrono::steady_clock::time_point;
Expand Down Expand Up @@ -566,7 +566,9 @@ class SynthTrace {
/// The string that was passed to Runtime::createStringFromAscii() or
/// Runtime::createStringFromUtf8() when the string was created.
std::string chars_;
/// Whether the string was created from ASCII or UTF8
/// The string that was passed to Runtime::createStringFromUtf16()
std::u16string chars16_;
/// Whether the String was created from ASCII, UTF-8 or UTF-16
StringEncodingType encodingType_;

// General UTF-8.
Expand All @@ -589,6 +591,16 @@ class SynthTrace {
objID_(objID),
chars_(chars, length),
encodingType_(StringEncodingType::ASCII) {}
// UTF-16.
CreateStringRecord(
TimeSinceStart time,
ObjectID objID,
const char16_t *chars,
size_t length)
: Record(time),
objID_(objID),
chars16_(chars, length),
encodingType_(StringEncodingType::UTF16) {}

void toJSONInternal(::hermes::JSONEmitter &json) const override;
RecordType getType() const override {
Expand All @@ -613,7 +625,9 @@ class SynthTrace {
/// The string that was passed to Runtime::createPropNameIDFromAscii() or
/// Runtime::createPropNameIDFromUtf8().
std::string chars_;
/// Whether the PropNameID was created from ASCII or UTF-8
/// The string that was passed to Runtime::createPropNameIDFromUtf16()
std::u16string chars16_;
/// Whether the PropNameID was created from ASCII, UTF-8, or UTF-16
StringEncodingType encodingType_;

// General UTF-8.
Expand All @@ -636,6 +650,16 @@ class SynthTrace {
propNameID_(propNameID),
chars_(chars, length),
encodingType_(StringEncodingType::ASCII) {}
// UTF16
CreatePropNameIDRecord(
TimeSinceStart time,
ObjectID propNameID,
const char16_t *chars,
size_t length)
: Record(time),
propNameID_(propNameID),
chars16_(chars, length),
encodingType_(StringEncodingType::UTF16) {}

void toJSONInternal(::hermes::JSONEmitter &json) const override;
RecordType getType() const override {
Expand Down
99 changes: 66 additions & 33 deletions API/hermes/SynthTraceParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ using namespace ::hermes::parser;

namespace {

SynthTrace::StringEncodingType getStringEncodingType(
const std::string &encodingStr) {
if (encodingStr == "ASCII") {
return SynthTrace::StringEncodingType::ASCII;
}
if (encodingStr == "UTF-8") {
return SynthTrace::StringEncodingType::UTF8;
}
assert(encodingStr == "UTF-16");
return SynthTrace::StringEncodingType::UTF16;
}

/// Converts the data in the JSONString \p str into a u16string
std::u16string jsonStringToU16String(
const ::hermes::parser::JSONString &jsonStr) {
Expand Down Expand Up @@ -348,26 +360,34 @@ SynthTrace getTrace(
case RecordType::CreateString: {
auto encoding =
llvh::dyn_cast_or_null<JSONString>(obj->get("encoding"));
bool isAscii = false;
if (encoding->str() == "ASCII") {
isAscii = true;
} else {
assert(encoding->str() == "UTF-8");
}
auto str = llvh::dyn_cast_or_null<JSONString>(obj->get("chars"));
if (isAscii) {
trace.emplace_back<SynthTrace::CreateStringRecord>(
timeFromStart,
objID->getValue(),
str->str().data(),
str->str().size());
} else {
auto utf8Str = jsonStringToU8String(*str);
trace.emplace_back<SynthTrace::CreateStringRecord>(
timeFromStart,
objID->getValue(),
reinterpret_cast<const uint8_t *>(utf8Str.data()),
utf8Str.size());
switch (getStringEncodingType(encoding->str())) {
case SynthTrace::StringEncodingType::ASCII: {
trace.emplace_back<SynthTrace::CreateStringRecord>(
timeFromStart,
objID->getValue(),
str->str().data(),
str->str().size());
break;
}
case SynthTrace::StringEncodingType::UTF8: {
auto utf8Str = jsonStringToU8String(*str);
trace.emplace_back<SynthTrace::CreateStringRecord>(
timeFromStart,
objID->getValue(),
reinterpret_cast<const uint8_t *>(utf8Str.data()),
utf8Str.size());
break;
}
case SynthTrace::StringEncodingType::UTF16: {
auto utf16Str = jsonStringToU16String(*str);
trace.emplace_back<SynthTrace::CreateStringRecord>(
timeFromStart,
objID->getValue(),
utf16Str.data(),
utf16Str.size());
break;
}
}
break;
}
Expand All @@ -376,20 +396,33 @@ SynthTrace getTrace(
auto encoding =
llvh::dyn_cast_or_null<JSONString>(obj->get("encoding"));
auto str = llvh::dyn_cast_or_null<JSONString>(obj->get("chars"));
if (encoding->str() == "ASCII") {
trace.emplace_back<SynthTrace::CreatePropNameIDRecord>(
timeFromStart,
id->getValue(),
str->str().data(),
str->str().size());
} else {
assert(encoding->str() == "UTF-8");
auto utf8Str = jsonStringToU8String(*str);
trace.emplace_back<SynthTrace::CreatePropNameIDRecord>(
timeFromStart,
id->getValue(),
reinterpret_cast<const uint8_t *>(utf8Str.data()),
utf8Str.size());
switch (getStringEncodingType(encoding->str())) {
case SynthTrace::StringEncodingType::ASCII: {
trace.emplace_back<SynthTrace::CreatePropNameIDRecord>(
timeFromStart,
id->getValue(),
str->str().data(),
str->str().size());
break;
}
case SynthTrace::StringEncodingType::UTF8: {
auto utf8Str = jsonStringToU8String(*str);
trace.emplace_back<SynthTrace::CreatePropNameIDRecord>(
timeFromStart,
id->getValue(),
reinterpret_cast<const uint8_t *>(utf8Str.data()),
utf8Str.size());
break;
}
case SynthTrace::StringEncodingType::UTF16: {
auto utf16Str = jsonStringToU16String(*str);
trace.emplace_back<SynthTrace::CreatePropNameIDRecord>(
timeFromStart,
id->getValue(),
utf16Str.data(),
utf16Str.size());
break;
}
}
break;
}
Expand Down
10 changes: 9 additions & 1 deletion API/hermes/TraceInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -823,17 +823,23 @@ void TraceInterpreter::executeRecords() {
case SynthTrace::StringEncodingType::ASCII:
str = String::createFromAscii(
rt_, csr.chars_.data(), csr.chars_.size());
TRACE_EXPECT_EQ(csr.chars_, str.asString(rt_).utf8(rt_));
break;
case SynthTrace::StringEncodingType::UTF8:
str = String::createFromUtf8(
rt_,
reinterpret_cast<const uint8_t *>(csr.chars_.data()),
csr.chars_.size());
TRACE_EXPECT_EQ(csr.chars_, str.asString(rt_).utf8(rt_));
break;
case SynthTrace::StringEncodingType::UTF16:
str = String::createFromUtf16(
rt_, csr.chars16_.data(), csr.chars16_.size());
TRACE_EXPECT_EQ_UTF16(csr.chars16_, str.asString(rt_).utf16(rt_));
break;
default:
llvm_unreachable("No other way to construct String");
}
TRACE_EXPECT_EQ(csr.chars_, str.asString(rt_).utf8(rt_));
addToObjectMap(csr.objID_, std::move(str), currentExecIndex);
break;
}
Expand All @@ -847,6 +853,8 @@ void TraceInterpreter::executeRecords() {
return PropNameID::forAscii(rt_, cpnr.chars_);
case SynthTrace::StringEncodingType::UTF8:
return PropNameID::forUtf8(rt_, cpnr.chars_);
case SynthTrace::StringEncodingType::UTF16:
return PropNameID::forUtf16(rt_, cpnr.chars16_);
}
llvm_unreachable("No other way to construct PropNameID");
}();
Expand Down
18 changes: 18 additions & 0 deletions API/hermes/TracingRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,15 @@ jsi::String TracingRuntime::createStringFromUtf8(
return res;
};

jsi::String TracingRuntime::createStringFromUtf16(
const char16_t *utf16,
size_t length) {
jsi::String res = RD::createStringFromUtf16(utf16, length);
trace_.emplace_back<SynthTrace::CreateStringRecord>(
getTimeSinceStart(), defObjectID(res), utf16, length);
return res;
}

jsi::PropNameID TracingRuntime::createPropNameIDFromAscii(
const char *str,
size_t length) {
Expand All @@ -582,6 +591,15 @@ jsi::PropNameID TracingRuntime::createPropNameIDFromUtf8(
return res;
}

jsi::PropNameID TracingRuntime::createPropNameIDFromUtf16(
const char16_t *utf16,
size_t length) {
jsi::PropNameID res = RD::createPropNameIDFromUtf16(utf16, length);
trace_.emplace_back<SynthTrace::CreatePropNameIDRecord>(
getTimeSinceStart(), defObjectID(res), utf16, length);
return res;
}

std::string TracingRuntime::utf8(const jsi::PropNameID &name) {
std::string res = RD::utf8(name);
trace_.emplace_back<SynthTrace::Utf8Record>(
Expand Down
5 changes: 5 additions & 0 deletions API/hermes/TracingRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,17 @@ class TracingRuntime : public jsi::RuntimeDecorator<jsi::Runtime> {

jsi::String createStringFromAscii(const char *str, size_t length) override;
jsi::String createStringFromUtf8(const uint8_t *utf8, size_t length) override;
jsi::String createStringFromUtf16(const char16_t *utf16, size_t length)
override;
std::string utf8(const jsi::PropNameID &) override;

jsi::PropNameID createPropNameIDFromAscii(const char *str, size_t length)
override;
jsi::PropNameID createPropNameIDFromUtf8(const uint8_t *utf8, size_t length)
override;
jsi::PropNameID createPropNameIDFromUtf16(
const char16_t *utf16,
size_t length) override;
std::string utf8(const jsi::String &) override;

std::u16string utf16(const jsi::PropNameID &) override;
Expand Down
39 changes: 18 additions & 21 deletions unittests/API/SynthTraceTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1489,16 +1489,14 @@ TEST_F(SynthTraceReplayTest, CreateObjectReplay) {
TEST_F(SynthTraceReplayTest, UTF16Replay) {
{
auto &rt = *traceRt;
jsi::String emoji = eval(rt, "'\\ud83d\\udc4d'").getString(rt);
rt.global().setProperty(rt, "emoji", emoji);
// UTF-16 encoding for 👍 is 0xd83d 0xdc4d
jsi::String emoji = jsi::String::createFromUtf16(rt, u"\xd83d\xdc4d");
emoji.utf16(rt);

jsi::String loneHighSurrogate = eval(rt, "'\\ud83d'").getString(rt);
rt.global().setProperty(rt, "loneHighSurrogate", loneHighSurrogate);
jsi::String loneHighSurrogate = jsi::String::createFromUtf16(rt, u"\xd83d");
loneHighSurrogate.utf16(rt);

jsi::String ascii = eval(rt, "'hello'").getString(rt);
rt.global().setProperty(rt, "hello", ascii);
jsi::String ascii = jsi::String::createFromUtf16(rt, u"hello");
ascii.utf16(rt);
}

Expand All @@ -1511,13 +1509,14 @@ TEST_F(SynthTraceReplayTest, GetStringDataReplay) {
{
auto &rt = *traceRt;
auto cb = [](bool ascii, const void *data, size_t num) {};
jsi::String emoji = eval(rt, "'\\ud83d\\udc4d'").getString(rt);
// UTF-16 encoding for 👍 is 0xd83d 0xdc4d
jsi::String emoji = jsi::String::createFromUtf16(rt, u"\xd83d\xdc4d");
emoji.getStringData(rt, cb);

jsi::String loneHighSurrogate = eval(rt, "'\\ud83d'").getString(rt);
jsi::String loneHighSurrogate = jsi::String::createFromUtf16(rt, u"\xd83d");
loneHighSurrogate.getStringData(rt, cb);

jsi::String ascii = eval(rt, "'hello'").getString(rt);
jsi::String ascii = jsi::String::createFromUtf16(rt, u"hello");
ascii.getStringData(rt, cb);
}

Expand All @@ -1528,18 +1527,16 @@ TEST_F(SynthTraceReplayTest, GetPropNameIdDataReplay) {
{
auto &rt = *traceRt;
auto cb = [](bool ascii, const void *data, size_t num) {};
jsi::String emoji = eval(rt, "'\\ud83d\\udc4d'").getString(rt);
auto emojiProp = jsi::PropNameID::forString(rt, emoji);
emojiProp.getPropNameIdData(rt, cb);

jsi::String loneHighSurrogate = eval(rt, "'\\ud83d'").getString(rt);
auto loneHighSurrogateProp =
jsi::PropNameID::forString(rt, loneHighSurrogate);
loneHighSurrogateProp.getPropNameIdData(rt, cb);

jsi::String ascii = eval(rt, "'hello'").getString(rt);
auto asciiProp = jsi::PropNameID::forString(rt, ascii);
asciiProp.getPropNameIdData(rt, cb);
// UTF-16 encoding for 👍 is 0xd83d 0xdc4d
jsi::PropNameID emoji = jsi::PropNameID::forUtf16(rt, u"\xd83d\xdc4d");
emoji.getPropNameIdData(rt, cb);

jsi::PropNameID loneHighSurrogate =
jsi::PropNameID::forUtf16(rt, u"\xd83d");
loneHighSurrogate.getPropNameIdData(rt, cb);

jsi::PropNameID ascii = jsi::PropNameID::forUtf16(rt, u"hello");
ascii.getPropNameIdData(rt, cb);
}

replay();
Expand Down

0 comments on commit a08b71b

Please sign in to comment.