@@ -553,135 +553,125 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
553553 * disambiguate it against other similar methods of this class.
554554 */
555555 private String (Charset charset , byte [] bytes , int offset , int length ) {
556+ String str ;
556557 if (length == 0 ) {
557- this .value = "" .value ;
558- this .coder = "" .coder ;
558+ str = "" ;
559559 } else if (charset == UTF_8 .INSTANCE ) {
560- if (COMPACT_STRINGS ) {
561- int dp = StringCoding .countPositives (bytes , offset , length );
562- if (dp == length ) {
563- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
564- this .coder = LATIN1 ;
565- return ;
560+ str = utf8 (bytes , offset , length );
561+ } else if (charset == ISO_8859_1 .INSTANCE ) {
562+ str = iso88591 (bytes , offset , length );
563+ } else if (charset == US_ASCII .INSTANCE ) {
564+ str = ascii (bytes , offset , length );
565+ } else {
566+ str = decode (charset , bytes , offset , length );
567+ }
568+ this (str );
569+ }
570+
571+ private static String utf8 (byte [] bytes , int offset , int length ) {
572+ if (COMPACT_STRINGS ) {
573+ int dp = StringCoding .countPositives (bytes , offset , length );
574+ if (dp == length ) {
575+ return new String (Arrays .copyOfRange (bytes , offset , offset + length ), LATIN1 );
576+ }
577+ // Decode with a stable copy, to be the result if the decoded length is the same
578+ byte [] latin1 = Arrays .copyOfRange (bytes , offset , offset + length );
579+ int sp = dp ; // first dp bytes are already in the copy
580+ while (sp < length ) {
581+ int b1 = latin1 [sp ++];
582+ if (b1 >= 0 ) {
583+ latin1 [dp ++] = (byte ) b1 ;
584+ continue ;
566585 }
567- // Decode with a stable copy, to be the result if the decoded length is the same
568- byte [] latin1 = Arrays .copyOfRange (bytes , offset , offset + length );
569- int sp = dp ; // first dp bytes are already in the copy
570- while (sp < length ) {
571- int b1 = latin1 [sp ++];
572- if (b1 >= 0 ) {
573- latin1 [dp ++] = (byte )b1 ;
586+ if ((b1 & 0xfe ) == 0xc2 && sp < length ) { // b1 either 0xc2 or 0xc3
587+ int b2 = latin1 [sp ];
588+ if (b2 < -64 ) { // continuation bytes are always negative values in the range -128 to -65
589+ latin1 [dp ++] = (byte ) decode2 (b1 , b2 );
590+ sp ++;
574591 continue ;
575592 }
576- if ((b1 & 0xfe ) == 0xc2 && sp < length ) { // b1 either 0xc2 or 0xc3
577- int b2 = latin1 [sp ];
578- if (b2 < -64 ) { // continuation bytes are always negative values in the range -128 to -65
579- latin1 [dp ++] = (byte )decode2 (b1 , b2 );
580- sp ++;
581- continue ;
582- }
583- }
584- // anything not a latin1, including the REPL
585- // we have to go with the utf16
586- sp --;
587- break ;
588- }
589- if (sp == length ) {
590- if (dp != latin1 .length ) {
591- latin1 = Arrays .copyOf (latin1 , dp );
592- }
593- this .value = latin1 ;
594- this .coder = LATIN1 ;
595- return ;
596- }
597- byte [] utf16 = StringUTF16 .newBytesFor (length );
598- StringLatin1 .inflate (latin1 , 0 , utf16 , 0 , dp );
599- dp = decodeUTF8_UTF16 (latin1 , sp , length , utf16 , dp , true );
600- if (dp != length ) {
601- utf16 = Arrays .copyOf (utf16 , dp << 1 );
602593 }
603- this .value = utf16 ;
604- this .coder = UTF16 ;
605- } else { // !COMPACT_STRINGS
606- byte [] dst = StringUTF16 .newBytesFor (length );
607- int dp = decodeUTF8_UTF16 (bytes , offset , offset + length , dst , 0 , true );
608- if (dp != length ) {
609- dst = Arrays .copyOf (dst , dp << 1 );
594+ // anything not a latin1, including the REPL
595+ // we have to go with the utf16
596+ sp --;
597+ break ;
598+ }
599+ if (sp == length ) {
600+ if (dp != latin1 .length ) {
601+ latin1 = Arrays .copyOf (latin1 , dp );
610602 }
611- this .value = dst ;
612- this .coder = UTF16 ;
603+ return new String (latin1 , LATIN1 );
613604 }
614- } else if (charset == ISO_8859_1 .INSTANCE ) {
615- if (COMPACT_STRINGS ) {
616- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
617- this .coder = LATIN1 ;
618- } else {
619- this .value = StringLatin1 .inflate (bytes , offset , length );
620- this .coder = UTF16 ;
605+ byte [] utf16 = StringUTF16 .newBytesFor (length );
606+ StringLatin1 .inflate (latin1 , 0 , utf16 , 0 , dp );
607+ dp = decodeUTF8_UTF16 (latin1 , sp , length , utf16 , dp , true );
608+ if (dp != length ) {
609+ utf16 = Arrays .copyOf (utf16 , dp << 1 );
621610 }
622- } else if (charset == US_ASCII .INSTANCE ) {
623- if (COMPACT_STRINGS && !StringCoding .hasNegatives (bytes , offset , length )) {
624- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
625- this .coder = LATIN1 ;
626- } else {
627- byte [] dst = StringUTF16 .newBytesFor (length );
628- int dp = 0 ;
629- while (dp < length ) {
630- int b = bytes [offset ++];
631- StringUTF16 .putChar (dst , dp ++, (b >= 0 ) ? (char ) b : REPL );
632- }
633- this .value = dst ;
634- this .coder = UTF16 ;
611+ return new String (utf16 , UTF16 );
612+ } else { // !COMPACT_STRINGS
613+ byte [] dst = StringUTF16 .newBytesFor (length );
614+ int dp = decodeUTF8_UTF16 (bytes , offset , offset + length , dst , 0 , true );
615+ if (dp != length ) {
616+ dst = Arrays .copyOf (dst , dp << 1 );
635617 }
618+ return new String (dst , UTF16 );
619+ }
620+ }
621+
622+ private static String iso88591 (byte [] bytes , int offset , int length ) {
623+ if (COMPACT_STRINGS ) {
624+ return new String (Arrays .copyOfRange (bytes , offset , offset + length ), LATIN1 );
636625 } else {
637- // (1)We never cache the "external" cs, the only benefit of creating
638- // an additional StringDe/Encoder object to wrap it is to share the
639- // de/encode() method. These SD/E objects are short-lived, the young-gen
640- // gc should be able to take care of them well. But the best approach
641- // is still not to generate them if not really necessary.
642- // (2)The defensive copy of the input byte/char[] has a big performance
643- // impact, as well as the outgoing result byte/char[]. Need to do the
644- // optimization check of (sm==null && classLoader0==null) for both.
645- CharsetDecoder cd = charset .newDecoder ();
646- // ArrayDecoder fastpaths
647- if (cd instanceof ArrayDecoder ad ) {
648- // ascii
649- if (ad .isASCIICompatible () && !StringCoding .hasNegatives (bytes , offset , length )) {
650- if (COMPACT_STRINGS ) {
651- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
652- this .coder = LATIN1 ;
653- return ;
654- }
655- this .value = StringLatin1 .inflate (bytes , offset , length );
656- this .coder = UTF16 ;
657- return ;
658- }
626+ return new String (StringLatin1 .inflate (bytes , offset , length ), UTF16 );
627+ }
628+ }
629+
630+ private static String ascii (byte [] bytes , int offset , int length ) {
631+ if (COMPACT_STRINGS && !StringCoding .hasNegatives (bytes , offset , length )) {
632+ return new String (Arrays .copyOfRange (bytes , offset , offset + length ), LATIN1 );
633+ } else {
634+ byte [] dst = StringUTF16 .newBytesFor (length );
635+ int dp = 0 ;
636+ while (dp < length ) {
637+ int b = bytes [offset ++];
638+ StringUTF16 .putChar (dst , dp ++, (b >= 0 ) ? (char ) b : REPL );
639+ }
640+ return new String (dst , UTF16 );
641+ }
642+ }
659643
644+ private static String decode (Charset charset , byte [] bytes , int offset , int length ) {
645+ // (1)We never cache the "external" cs, the only benefit of creating
646+ // an additional StringDe/Encoder object to wrap it is to share the
647+ // de/encode() method. These SD/E objects are short-lived, the young-gen
648+ // gc should be able to take care of them well. But the best approach
649+ // is still not to generate them if not really necessary.
650+ // (2)The defensive copy of the input byte/char[] has a big performance
651+ // impact, as well as the outgoing result byte/char[]. Need to do the
652+ // optimization check of (sm==null && classLoader0==null) for both.
653+ CharsetDecoder cd = charset .newDecoder ();
654+ // ArrayDecoder fastpaths
655+ if (cd instanceof ArrayDecoder ad ) {
656+ // ascii
657+ if (ad .isASCIICompatible () && !StringCoding .hasNegatives (bytes , offset , length )) {
658+ return iso88591 (bytes , offset , length );
659+ } else {
660660 // fastpath for always Latin1 decodable single byte
661661 if (COMPACT_STRINGS && ad .isLatin1Decodable ()) {
662662 byte [] dst = new byte [length ];
663663 ad .decodeToLatin1 (bytes , offset , length , dst );
664- this .value = dst ;
665- this .coder = LATIN1 ;
666- return ;
667- }
668-
669- int en = scale (length , cd .maxCharsPerByte ());
670- cd .onMalformedInput (CodingErrorAction .REPLACE )
671- .onUnmappableCharacter (CodingErrorAction .REPLACE );
672- char [] ca = new char [en ];
673- int clen = ad .decode (bytes , offset , length , ca );
674- if (COMPACT_STRINGS ) {
675- byte [] val = StringUTF16 .compress (ca , 0 , clen );;
676- this .coder = StringUTF16 .coderFromArrayLen (val , clen );
677- this .value = val ;
678- return ;
664+ return new String (dst , LATIN1 );
665+ } else {
666+ int en = scale (length , cd .maxCharsPerByte ());
667+ cd .onMalformedInput (CodingErrorAction .REPLACE )
668+ .onUnmappableCharacter (CodingErrorAction .REPLACE );
669+ char [] ca = new char [en ];
670+ int clen = ad .decode (bytes , offset , length , ca );
671+ return new String (ca , 0 , clen , null );
679672 }
680- coder = UTF16 ;
681- value = StringUTF16 .toBytes (ca , 0 , clen );
682- return ;
683673 }
684-
674+ } else {
685675 // decode using CharsetDecoder
686676 int en = scale (length , cd .maxCharsPerByte ());
687677 cd .onMalformedInput (CodingErrorAction .REPLACE )
@@ -694,14 +684,7 @@ private String(Charset charset, byte[] bytes, int offset, int length) {
694684 // Substitution is enabled, so this shouldn't happen
695685 throw new Error (x );
696686 }
697- if (COMPACT_STRINGS ) {
698- byte [] val = StringUTF16 .compress (ca , 0 , caLen );
699- this .coder = StringUTF16 .coderFromArrayLen (val , caLen );
700- this .value = val ;
701- return ;
702- }
703- coder = UTF16 ;
704- value = StringUTF16 .toBytes (ca , 0 , caLen );
687+ return new String (ca , 0 , caLen , null );
705688 }
706689 }
707690
0 commit comments