Skip to content

Commit 3893ab8

Browse files
committed
feat: init
1 parent 812d12a commit 3893ab8

File tree

2,346 files changed

+20150
-31311
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,346 files changed

+20150
-31311
lines changed

.changeset/metal-radios-swim.md

Lines changed: 0 additions & 5 deletions
This file was deleted.

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bindings/binding_core_node/src/bundle.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,6 @@ impl swc_core::bundler::Hook for Hook {
281281
span,
282282
raw: None,
283283
value: file_name.into(),
284-
lone_surrogates: false,
285284
}))),
286285
},
287286
KeyValueProp {

bindings/binding_es_ast_viewer/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
"devDependencies": {
33
"jest": "^29.7.0"
44
}
5-
}
5+
}

crates/hstr/src/lib.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
44
use core::str;
55
use std::{
6+
borrow::Borrow,
67
fmt::{Debug, Display},
78
hash::Hash,
89
mem::{self, forget, transmute, ManuallyDrop},
@@ -104,6 +105,7 @@ pub use wtf8_atom::Wtf8Atom;
104105
/// - Atoms created via the `atom!` macro or `String::into` are stored in the
105106
/// global atom store. By default, these atoms are never deallocated. To clean
106107
/// up unused atoms, call [global_atom_store_gc].
108+
#[repr(transparent)]
107109
pub struct Atom {
108110
// If this Atom is a dynamic one, this is *const Entry
109111
unsafe_data: TaggedValue,
@@ -369,6 +371,19 @@ impl PartialEq<Atom> for str {
369371
}
370372
}
371373

374+
impl Borrow<Wtf8Atom> for Atom {
375+
#[inline(always)]
376+
fn borrow(&self) -> &Wtf8Atom {
377+
// SAFETY:
378+
// 1. Wtf8Atom is #[repr(transparent)] over TaggedValue
379+
// 2. Atom is #[repr(transparent)] over TaggedValue
380+
// 3. hstr::Atom and hstr::Wtf8Atom share the same TaggedValue
381+
const _: () = assert!(std::mem::size_of::<Atom>() == std::mem::size_of::<Wtf8Atom>());
382+
const _: () = assert!(std::mem::align_of::<Atom>() == std::mem::align_of::<Wtf8Atom>());
383+
unsafe { transmute::<&Atom, &Wtf8Atom>(self) }
384+
}
385+
}
386+
372387
/// NOT A PUBLIC API
373388
#[cfg(feature = "rkyv")]
374389
impl rkyv::Archive for Atom {

crates/hstr/src/wtf8/mod.rs

Lines changed: 158 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use core::{
3535
slice, str,
3636
str::FromStr,
3737
};
38+
use std::ops::Add;
3839

3940
mod not_quite_std;
4041

@@ -68,15 +69,15 @@ impl CodePoint {
6869
///
6970
/// Only use when `value` is known to be less than or equal to 0x10FFFF.
7071
#[inline]
71-
pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint {
72+
pub const unsafe fn from_u32_unchecked(value: u32) -> CodePoint {
7273
CodePoint { value }
7374
}
7475

7576
/// Create a new `CodePoint` if the value is a valid code point.
7677
///
7778
/// Return `None` if `value` is above 0x10FFFF.
7879
#[inline]
79-
pub fn from_u32(value: u32) -> Option<CodePoint> {
80+
pub const fn from_u32(value: u32) -> Option<CodePoint> {
8081
match value {
8182
0..=0x10ffff => Some(CodePoint { value }),
8283
_ => None,
@@ -87,7 +88,7 @@ impl CodePoint {
8788
///
8889
/// Since all Unicode scalar values are code points, this always succeds.
8990
#[inline]
90-
pub fn from_char(value: char) -> CodePoint {
91+
pub const fn from_char(value: char) -> CodePoint {
9192
CodePoint {
9293
value: value as u32,
9394
}
@@ -118,6 +119,18 @@ impl CodePoint {
118119
pub fn to_char_lossy(&self) -> char {
119120
self.to_char().unwrap_or('\u{FFFD}')
120121
}
122+
123+
/// Return `true` if the code point is in the ASCII range.
124+
#[inline]
125+
pub fn is_ascii(&self) -> bool {
126+
self.value <= 0x7f
127+
}
128+
}
129+
130+
impl PartialEq<char> for CodePoint {
131+
fn eq(&self, other: &char) -> bool {
132+
self.value == *other as u32
133+
}
121134
}
122135

123136
/// An owned, growable string of well-formed WTF-8 data.
@@ -165,6 +178,23 @@ impl FromStr for Wtf8Buf {
165178
}
166179
}
167180

181+
impl fmt::Write for Wtf8Buf {
182+
fn write_str(&mut self, s: &str) -> std::fmt::Result {
183+
self.push_str(s);
184+
Ok(())
185+
}
186+
}
187+
188+
impl Add<&Wtf8> for Wtf8Buf {
189+
type Output = Wtf8Buf;
190+
191+
fn add(self, rhs: &Wtf8) -> Self::Output {
192+
let mut result = self;
193+
result.push_wtf8(rhs);
194+
result
195+
}
196+
}
197+
168198
impl Wtf8Buf {
169199
/// Create an new, empty WTF-8 string.
170200
#[inline]
@@ -313,6 +343,12 @@ impl Wtf8Buf {
313343
self.bytes.truncate(new_len)
314344
}
315345

346+
/// Clear the WTF-8 vector, removing all contents.
347+
#[inline]
348+
pub fn clear(&mut self) {
349+
self.bytes.clear();
350+
}
351+
316352
/// Consume the WTF-8 string and try to convert it to UTF-8.
317353
///
318354
/// This does not copy the data.
@@ -345,6 +381,26 @@ impl Wtf8Buf {
345381
}
346382
}
347383
}
384+
385+
/// Create a [Wtf8Buf] from a WTF-8 encoded byte vector.
386+
///
387+
/// # Safety
388+
///
389+
/// The caller must ensure that `bytes` is a well-formed WTF-8 byte
390+
/// sequence.
391+
///
392+
/// This means that:
393+
/// - All bytes must form valid UTF-8 sequences OR valid surrogate code
394+
/// point encodings
395+
/// - Surrogate code points may appear unpaired and be encoded separately,
396+
/// but if they are paired, it should be encoded as a single 4-byte UTF-8
397+
/// sequence. For example, the byte sequence `[0xED, 0xA0, 0x80, 0xED,
398+
/// 0xB0, 0x80]` is not valid WTF-8 because WTF-8 forbids encoding a
399+
/// surrogate pair as two separate 3-byte sequences.
400+
#[inline]
401+
pub unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> Self {
402+
Self { bytes }
403+
}
348404
}
349405

350406
/// Create a new WTF-8 string from an iterator of code points.
@@ -474,6 +530,12 @@ impl Wtf8 {
474530
self.bytes.is_empty()
475531
}
476532

533+
/// Return `true` if the string contains only ASCII characters.
534+
#[inline]
535+
pub const fn is_ascii(&self) -> bool {
536+
self.bytes.is_ascii()
537+
}
538+
477539
/// Return a slice of the given string for the byte range [`begin`..`end`).
478540
///
479541
/// # Failure
@@ -547,6 +609,34 @@ impl Wtf8 {
547609
}
548610
}
549611

612+
/// Returns `true` if this WTF-8 string contains the given character.
613+
#[inline]
614+
pub fn contains_char(&self, ch: char) -> bool {
615+
let target = CodePoint::from_char(ch);
616+
self.contains(target)
617+
}
618+
619+
/// Returns `true` if this WTF-8 string contains the given code point.
620+
#[inline]
621+
pub fn contains(&self, code_point: CodePoint) -> bool {
622+
self.code_points().any(|cp| cp == code_point)
623+
}
624+
625+
/// Returns `true` if this WTF-8 string starts with the given UTF-8 string.
626+
#[inline]
627+
pub fn starts_with(&self, pattern: &str) -> bool {
628+
if pattern.len() > self.len() {
629+
return false;
630+
}
631+
632+
let pattern_wtf8 = self.slice_to(pattern.len());
633+
if let Some(pattern_str) = pattern_wtf8.as_str() {
634+
pattern_str == pattern
635+
} else {
636+
false
637+
}
638+
}
639+
550640
/// Try to convert the string to UTF-8 and return a `&str` slice.
551641
///
552642
/// Return `None` if the string contains surrogates.
@@ -614,6 +704,46 @@ impl Wtf8 {
614704
}
615705
}
616706

707+
/// Returns the uppercase equivalent of this wtf8 slice, as a new [Wtf8Buf].
708+
#[inline]
709+
pub fn to_uppercase(&self) -> Wtf8Buf {
710+
let mut result = Wtf8Buf::with_capacity(self.len());
711+
for cp in self.code_points() {
712+
if let Some(ch) = cp.to_char() {
713+
for upper_ch in ch.to_uppercase() {
714+
result.push_char(upper_ch);
715+
}
716+
} else {
717+
// Surrogates are known to be in the code point range.
718+
let code_point = unsafe { CodePoint::from_u32_unchecked(cp.to_u32()) };
719+
// Skip the WTF-8 concatenation check,
720+
// surrogate pairs are already decoded by utf16_items
721+
not_quite_std::push_code_point(&mut result, code_point)
722+
}
723+
}
724+
result
725+
}
726+
727+
/// Returns the lowercase equivalent of this wtf8 slice, as a new [Wtf8Buf].
728+
#[inline]
729+
pub fn to_lowercase(&self) -> Wtf8Buf {
730+
let mut result = Wtf8Buf::with_capacity(self.len());
731+
for cp in self.code_points() {
732+
if let Some(ch) = cp.to_char() {
733+
for lower_ch in ch.to_lowercase() {
734+
result.push_char(lower_ch);
735+
}
736+
} else {
737+
// Surrogates are known to be in the code point range.
738+
let code_point = unsafe { CodePoint::from_u32_unchecked(cp.to_u32()) };
739+
// Skip the WTF-8 concatenation check,
740+
// surrogate pairs are already decoded by utf16_items
741+
not_quite_std::push_code_point(&mut result, code_point)
742+
}
743+
}
744+
result
745+
}
746+
617747
/// Create a WTF-8 from a WTF-8 encoded byte slice.
618748
///
619749
/// # Safety
@@ -770,6 +900,24 @@ impl PartialEq<Wtf8Buf> for &Wtf8 {
770900
}
771901
}
772902

903+
impl PartialEq<str> for &Wtf8 {
904+
fn eq(&self, other: &str) -> bool {
905+
match self.as_str() {
906+
Some(s) => s == other,
907+
None => false,
908+
}
909+
}
910+
}
911+
912+
impl PartialEq<&str> for &Wtf8 {
913+
fn eq(&self, other: &&str) -> bool {
914+
match self.as_str() {
915+
Some(s) => s == *other,
916+
None => false,
917+
}
918+
}
919+
}
920+
773921
impl hash::Hash for CodePoint {
774922
#[inline]
775923
fn hash<H: hash::Hasher>(&self, state: &mut H) {
@@ -824,6 +972,13 @@ impl<'a> From<&'a str> for &'a Wtf8 {
824972
}
825973
}
826974

975+
impl<'a> From<Wtf8Buf> for Cow<'a, Wtf8> {
976+
#[inline]
977+
fn from(s: Wtf8Buf) -> Cow<'a, Wtf8> {
978+
Cow::Owned(s)
979+
}
980+
}
981+
827982
#[cfg(test)]
828983
mod tests {
829984
use alloc::{format, vec};

0 commit comments

Comments
 (0)