107107--- Paste new string into original string, won't exceed the length of original string
108108--- @param str string
109109--- @param str2 string
110- --- @param pos number
110+ --- @param pos integer
111111--- @return string
112112--- @nodiscard
113113function STRING .paste (str ,str2 ,pos )
@@ -156,6 +156,80 @@ function STRING.unshift(str)
156156 return unshiftMap [str ] or lower (str )
157157end
158158
159+ local utf8 = require ' utf8'
160+
161+ --- Simple utf8 coding
162+ --- @param num integer
163+ --- @return string
164+ --- @nodiscard
165+ function STRING .UTF8 (num )
166+ assertf (type (num )== ' number' ," Wrong type (%s)" ,type (num ))
167+ assertf (num >= 0 and num < 2 ^ 31 ," Out of range (%d)" ,num )
168+ -- if num<2^7 then return char(num)
169+ -- elseif num<2^11 then return char(192+floor(num/2^06),128+num%2^6)
170+ -- elseif num<2^16 then return char(224+floor(num/2^12),128+floor(num/2^06)%2^6,128+num%2^6)
171+ if num < 0x10ffff then return utf8.char (num )
172+ elseif num < 2 ^ 21 then return char (240 + floor (num / 2 ^ 18 ),128 + floor (num / 2 ^ 12 )% 2 ^ 6 ,128 + floor (num / 2 ^ 06 )% 2 ^ 6 ,128 + num % 2 ^ 6 )
173+ elseif num < 2 ^ 26 then return char (248 + floor (num / 2 ^ 24 ),128 + floor (num / 2 ^ 18 )% 2 ^ 6 ,128 + floor (num / 2 ^ 12 )% 2 ^ 6 ,128 + floor (num / 2 ^ 06 )% 2 ^ 6 ,128 + num % 2 ^ 6 )
174+ else return char (252 + floor (num / 2 ^ 30 ),128 + floor (num / 2 ^ 24 )% 2 ^ 6 ,128 + floor (num / 2 ^ 18 )% 2 ^ 6 ,128 + floor (num / 2 ^ 12 )% 2 ^ 6 ,128 + floor (num / 2 ^ 06 )% 2 ^ 6 ,128 + num % 2 ^ 6 )
175+ end
176+ end
177+
178+ --- Get the (unicode) char count of (part of) a utf8 string
179+ --- @param str string an utf8 string
180+ --- @param i ? integer start byte (cannot start from middle of a char )
181+ --- @param j ? integer end byte
182+ --- @param lax ? boolean
183+ --- @nodiscard
184+ --- @diagnostic disable-next-line
185+ function STRING .u8len (str ,i ,j ,lax ) end
186+ STRING .u8len = utf8 [(' len' )]
187+
188+ --- Get a utf8 string with codepoint numbers
189+ --- @param ... integer
190+ --- @return string
191+ --- @nodiscard
192+ --- @diagnostic disable-next-line
193+ function STRING .u8char (...) end
194+ STRING .u8char = utf8 [(' char' )]
195+
196+ --- Get the codepoint(s) of a utf8 string
197+ --- @param str string
198+ --- @param i ? integer start byte (cannot start from middle of a char )
199+ --- @param j ? integer end byte
200+ --- @param lax ? boolean
201+ --- @return integer ...
202+ --- @nodiscard
203+ --- @diagnostic disable-next-line
204+ function STRING .u8byte (str ,i ,j ,lax ) end
205+ STRING .u8byte = utf8 [(' codepoint' )]
206+
207+ --- Get the end byte of `n` characters starting from `i` byte
208+ --- @param str string
209+ --- @param n integer char count
210+ --- @param i ? integer start byte
211+ --- @return integer p end byte
212+ --- @nodiscard
213+ --- @diagnostic disable-next-line
214+ function STRING .u8offset (str ,n ,i ) end
215+ STRING .u8offset = utf8 [(' offset' )]
216+
217+ --- Get a char iterator function of a utf8 string, similar to `str:gmatch('.')`
218+ ---
219+ --- ```lua
220+ --- for bytePos,codepoint in STRING.u8codes(str) do
221+ --- -- code here
222+ --- end
223+ --- @param str string
224+ --- @param lax ? boolean
225+ --- @return fun ( s : string , p : integer ): integer,integer
226+ --- @diagnostic disable-next-line
227+ function STRING .u8codes (str ,lax ) end
228+ STRING .u8codes = utf8 [(' codes' )]
229+
230+ --- Matches exactly one UTF-8 byte sequence, assuming that the subject is a valid UTF-8 string
231+ STRING .u8pattern = " [\0 -\x7F\xC2 -\xFD ][\x80 -\xBF ]*"
232+
159233local upperData ,lowerData ,diaData -- Data is filled later in this file
160234
161235--- string.upper with utf8 support, warning: low performance
213287--- Count the number of occurrences of a regex pattern in a string
214288--- @param str string
215289--- @param regex string
216- --- @return number
290+ --- @return integer
217291--- @nodiscard
218292function STRING .count (str ,regex )
219293 local _ ,count = gsub (str ,regex ,' ' )
231305--- end
232306--- ```
233307--- @param str string
234- --- @param keep ? boolean | number Max number of leading spaces to be trimmed: ` nil ` - all ` number ` - as you want ` true ` - same with line#1
308+ --- @param keep ? boolean | integer Max number of leading spaces to be trimmed: ` nil ` - all ` number ` - as you want ` true ` - same with line#1
235309--- @return string
236310--- @nodiscard
237311function STRING .trimIndent (str ,keep )
313387--- Calculate the edit distance between two strings
314388--- @param s1 string
315389--- @param s2 string
316- --- @return number
390+ --- @return integer
317391--- @nodiscard
318392function STRING .editDist (s1 ,s2 ) -- By Copilot
319393 local len1 ,len2 =# s1 ,# s2
389463
390464--- **Warning:** don't support number format like .26, must have digits before the dot, like 0.26
391465--- @param str string
392- --- @return number | nil , string | nil
466+ --- @return integer | nil , string | nil
393467--- @nodiscard
394468function STRING .cutUnit (str )
395469 local _s ,_e = find (str ,' ^-?%d+%.?%d*' )
@@ -426,22 +500,6 @@ STRING.base64={} for c in gmatch('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrst
426500 table.insert (STRING .base64 ,c )
427501end
428502
429- --- Simple utf8 coding
430- --- @param num number
431- --- @return string
432- --- @nodiscard
433- function STRING .UTF8 (num )
434- assertf (type (num )== ' number' ," Wrong type (%s)" ,type (num ))
435- assertf (num >= 0 and num < 2 ^ 31 ," Out of range (%d)" ,num )
436- if num < 2 ^ 7 then return char (num )
437- elseif num < 2 ^ 11 then return char (192 + floor (num / 2 ^ 06 ),128 + num % 2 ^ 6 )
438- elseif num < 2 ^ 16 then return char (224 + floor (num / 2 ^ 12 ),128 + floor (num / 2 ^ 06 )% 2 ^ 6 ,128 + num % 2 ^ 6 )
439- elseif num < 2 ^ 21 then return char (240 + floor (num / 2 ^ 18 ),128 + floor (num / 2 ^ 12 )% 2 ^ 6 ,128 + floor (num / 2 ^ 06 )% 2 ^ 6 ,128 + num % 2 ^ 6 )
440- elseif num < 2 ^ 26 then return char (248 + floor (num / 2 ^ 24 ),128 + floor (num / 2 ^ 18 )% 2 ^ 6 ,128 + floor (num / 2 ^ 12 )% 2 ^ 6 ,128 + floor (num / 2 ^ 06 )% 2 ^ 6 ,128 + num % 2 ^ 6 )
441- else return char (252 + floor (num / 2 ^ 30 ),128 + floor (num / 2 ^ 24 )% 2 ^ 6 ,128 + floor (num / 2 ^ 18 )% 2 ^ 6 ,128 + floor (num / 2 ^ 12 )% 2 ^ 6 ,128 + floor (num / 2 ^ 06 )% 2 ^ 6 ,128 + num % 2 ^ 6 )
442- end
443- end
444-
445503--- Parse binary number from string
446504--- @param str string
447505--- @return number
484542
485543--- Convert a number to binary string
486544--- @param num number
487- --- @param len ? number
545+ --- @param len ? integer
488546--- @return string
489547--- @nodiscard
490548function STRING .toBin (num ,len )
498556
499557--- Convert a number to octal string
500558--- @param num number
501- --- @param len ? number
559+ --- @param len ? integer
502560--- @return string
503561--- @nodiscard
504562function STRING .toOct (num ,len )
@@ -515,7 +573,7 @@ b16[0]='0'
515573
516574--- Convert an integer to hexadecimal string
517575--- @param num number
518- --- @param len ? number
576+ --- @param len ? integer
519577--- @return string
520578--- @nodiscard
521579function STRING .toHex (num ,len )
587645
588646--- Return 16 byte string. Not powerful hash, just simply protect the original text
589647--- @param text string
590- --- @param seedRange ? number default to 26
591- --- @param seed ? number default to 0
648+ --- @param seedRange ? integer default to 26
649+ --- @param seed ? integer default to 0
592650--- @return string
593651--- @nodiscard
594652function STRING .digezt (text ,seedRange ,seed )
628686
629687--- Cut n bytes off a string
630688--- @param str string
631- --- @param n number
689+ --- @param n integer
632690--- @return string , string #`n` bytes, and the rest of string
633691--- @nodiscard
634692function STRING .readChars (str ,n )
642700--- STRING.simplifyPath('Documents/Project/xxx.lua',3) --> 'Doc/Pro/xxx.lua'
643701--- ```
644702--- @param path string
645- --- @param len ? number default to 1
703+ --- @param len ? integer default to 1
646704--- @return string
647705--- @nodiscard
648706function STRING .simplifyPath (path ,len )
0 commit comments