Skip to content

Commit

Permalink
Add StringEncodingType for String/PropNameId encoding in SynthTrace…
Browse files Browse the repository at this point in the history
… Records

Summary:
Original Author: [email protected]
Original Git: d8127f9
Original Reviewed By: dannysu
Original Revision: D67303131

Adds an enum `StringEncodingType` to describe the encoding type of a
String or PropNameID when it is created. `CreatePropNameIDRecord`
already does something like this. CreateStringRecord` relies on a flag
to indicate if it's ASCII or UTF-8, which makes it hard to add UTF-16 as
well.

Next diff in stack will add UTF-16 support. This diff is just a small
refactor.

Reviewed By: neildhar

Differential Revision: D68599523

fbshipit-source-id: 11794e19c5a8b273a004c2da149935d7953c63b4
  • Loading branch information
tsaichien authored and facebook-github-bot committed Jan 24, 2025
1 parent a227066 commit c7abb36
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 22 deletions.
15 changes: 11 additions & 4 deletions API/hermes/SynthTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,14 +361,21 @@ void SynthTrace::BigIntToStringRecord::toJSONInternal(
json.emitKeyValue("radix", radix_);
}

static std::string encodingName(bool isASCII) {
return isASCII ? "ASCII" : "UTF-8";
static std::string encodingName(SynthTrace::StringEncodingType encoding) {
switch (encoding) {
case SynthTrace::StringEncodingType::UTF8:
return "UTF-8";
case SynthTrace::StringEncodingType::ASCII:
return "ASCII";
default:
llvm_unreachable("Invalid encoding type encountered.");
}
}

void SynthTrace::CreateStringRecord::toJSONInternal(JSONEmitter &json) const {
Record::toJSONInternal(json);
json.emitKeyValue("objID", objID_);
json.emitKeyValue("encoding", encodingName(ascii_));
json.emitKeyValue("encoding", encodingName(encodingType_));
// For UTF-8 Strings, copy the content to a char16 array and emit each byte as
// a code unit. This allows us to reconstruct the exact string byte-for-byte
// during replay.
Expand All @@ -382,7 +389,7 @@ void SynthTrace::CreatePropNameIDRecord::toJSONInternal(
JSONEmitter &json) const {
Record::toJSONInternal(json);
json.emitKeyValue("objID", propNameID_);
json.emitKeyValue("encoding", encodingName(valueType_ == ASCII));
json.emitKeyValue("encoding", encodingName(encodingType_));
// For UTF-8 Strings, copy the content to a char16 array and emit each byte
// as a code unit. This allows us to reconstruct the exact string
// byte-for-byte during replay.
Expand Down
20 changes: 13 additions & 7 deletions API/hermes/SynthTrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ class SynthTrace {
} val_;
};

/// Represents the encoding type of a String or PropNameId
enum class StringEncodingType { ASCII, UTF8 };

/// A TimePoint is a time when some event occurred.
using TimePoint = std::chrono::steady_clock::time_point;
using TimeSinceStart = std::chrono::milliseconds;
Expand Down Expand Up @@ -563,8 +566,8 @@ class SynthTrace {
/// The string that was passed to Runtime::createStringFromAscii() or
/// Runtime::createStringFromUtf8() when the string was created.
std::string chars_;
/// Whether the string was created from ASCII (true) or UTF8 (false).
bool ascii_;
/// Whether the string was created from ASCII or UTF8
StringEncodingType encodingType_;

// General UTF-8.
CreateStringRecord(
Expand All @@ -575,14 +578,17 @@ class SynthTrace {
: Record(time),
objID_(objID),
chars_(reinterpret_cast<const char *>(chars), length),
ascii_(false) {}
encodingType_(StringEncodingType::UTF8) {}
// Ascii.
CreateStringRecord(
TimeSinceStart time,
ObjectID objID,
const char *chars,
size_t length)
: Record(time), objID_(objID), chars_(chars, length), ascii_(true) {}
: Record(time),
objID_(objID),
chars_(chars, length),
encodingType_(StringEncodingType::ASCII) {}

void toJSONInternal(::hermes::JSONEmitter &json) const override;
RecordType getType() const override {
Expand All @@ -608,7 +614,7 @@ class SynthTrace {
/// Runtime::createPropNameIDFromUtf8().
std::string chars_;
/// Whether the PropNameID was created from ASCII or UTF-8
enum ValueType { ASCII, UTF8 } valueType_;
StringEncodingType encodingType_;

// General UTF-8.
CreatePropNameIDRecord(
Expand All @@ -619,7 +625,7 @@ class SynthTrace {
: Record(time),
propNameID_(propNameID),
chars_(reinterpret_cast<const char *>(chars), length),
valueType_(UTF8) {}
encodingType_(StringEncodingType::UTF8) {}
// Ascii.
CreatePropNameIDRecord(
TimeSinceStart time,
Expand All @@ -629,7 +635,7 @@ class SynthTrace {
: Record(time),
propNameID_(propNameID),
chars_(chars, length),
valueType_(ASCII) {}
encodingType_(StringEncodingType::ASCII) {}

void toJSONInternal(::hermes::JSONEmitter &json) const override;
RecordType getType() const override {
Expand Down
27 changes: 16 additions & 11 deletions API/hermes/TraceInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -819,14 +819,19 @@ void TraceInterpreter::executeRecords() {
const auto &csr =
static_cast<const SynthTrace::CreateStringRecord &>(*rec);
Value str;
if (csr.ascii_) {
str = String::createFromAscii(
rt_, csr.chars_.data(), csr.chars_.size());
} else {
str = String::createFromUtf8(
rt_,
reinterpret_cast<const uint8_t *>(csr.chars_.data()),
csr.chars_.size());
switch (csr.encodingType_) {
case SynthTrace::StringEncodingType::ASCII:
str = String::createFromAscii(
rt_, csr.chars_.data(), csr.chars_.size());
break;
case SynthTrace::StringEncodingType::UTF8:
str = String::createFromUtf8(
rt_,
reinterpret_cast<const uint8_t *>(csr.chars_.data()),
csr.chars_.size());
break;
default:
llvm_unreachable("No other way to construct String");
}
TRACE_EXPECT_EQ(csr.chars_, str.asString(rt_).utf8(rt_));
addToObjectMap(csr.objID_, std::move(str), currentExecIndex);
Expand All @@ -837,10 +842,10 @@ void TraceInterpreter::executeRecords() {
static_cast<const SynthTrace::CreatePropNameIDRecord &>(*rec);
// We perform the calls below for their side effects (for example,
jsi::PropNameID propNameID = [&] {
switch (cpnr.valueType_) {
case SynthTrace::CreatePropNameIDRecord::ASCII:
switch (cpnr.encodingType_) {
case SynthTrace::StringEncodingType::ASCII:
return PropNameID::forAscii(rt_, cpnr.chars_);
case SynthTrace::CreatePropNameIDRecord::UTF8:
case SynthTrace::StringEncodingType::UTF8:
return PropNameID::forUtf8(rt_, cpnr.chars_);
}
llvm_unreachable("No other way to construct PropNameID");
Expand Down

0 comments on commit c7abb36

Please sign in to comment.