@@ -917,18 +917,21 @@ private static nuint ZSTD_execSequenceEndSplitLitBuffer(byte* op, byte* oend, by
917
917
[ MethodImpl( MethodImplOptions. AggressiveInlining) ]
918
918
private static nuint ZSTD_execSequence( byte * op, byte * oend, seq_t sequence, byte * * litPtr, byte * litLimit, byte * prefixStart, byte * virtualStart, byte * dictEnd)
919
919
{
920
- byte * oLitEnd = op + sequence. litLength;
921
- nuint sequenceLength = sequence. litLength + sequence. matchLength;
920
+ var sequence_litLength = sequence. litLength;
921
+ var sequence_matchLength = sequence. matchLength;
922
+ var sequence_offset = sequence. offset;
923
+ byte * oLitEnd = op + sequence_litLength;
924
+ nuint sequenceLength = sequence_litLength + sequence_matchLength;
922
925
/* risk : address space overflow (32-bits) */
923
926
byte * oMatchEnd = op + sequenceLength;
924
927
/* risk : address space underflow on oend=NULL */
925
928
byte * oend_w = oend - 32 ;
926
- byte * iLitEnd = * litPtr + sequence . litLength ;
927
- byte * match = oLitEnd - sequence . offset ;
929
+ byte * iLitEnd = * litPtr + sequence_litLength ;
930
+ byte * match = oLitEnd - sequence_offset ;
928
931
assert( op != null ) ;
929
932
assert( oend_w < oend) ;
930
933
if ( iLitEnd > litLimit || oMatchEnd > oend_w || MEM_32bits && ( nuint ) ( oend - op) < sequenceLength + 32 )
931
- return ZSTD_execSequenceEnd( op, oend, sequence , litPtr, litLimit, prefixStart, virtualStart, dictEnd) ;
934
+ return ZSTD_execSequenceEnd( op, oend, new seq_t { litLength = sequence_litLength , matchLength = sequence_matchLength , offset = sequence_offset } , litPtr, litLimit, prefixStart, virtualStart, dictEnd) ;
932
935
assert( op <= oLitEnd) ;
933
936
assert( oLitEnd < oMatchEnd) ;
934
937
assert( oMatchEnd <= oend) ;
@@ -937,52 +940,52 @@ private static nuint ZSTD_execSequence(byte* op, byte* oend, seq_t sequence, byt
937
940
assert( oMatchEnd <= oend_w) ;
938
941
assert( 32 >= 16 ) ;
939
942
ZSTD_copy16( op, * litPtr) ;
940
- if ( sequence . litLength > 16 )
943
+ if ( sequence_litLength > 16 )
941
944
{
942
- ZSTD_wildcopy( op + 16 , * litPtr + 16 , ( nint ) ( sequence . litLength - 16 ) , ZSTD_overlap_e. ZSTD_no_overlap) ;
945
+ ZSTD_wildcopy( op + 16 , * litPtr + 16 , ( nint ) ( sequence_litLength - 16 ) , ZSTD_overlap_e. ZSTD_no_overlap) ;
943
946
}
944
947
945
948
op = oLitEnd;
946
949
* litPtr = iLitEnd;
947
- if ( sequence . offset > ( nuint ) ( oLitEnd - prefixStart) )
950
+ if ( sequence_offset > ( nuint ) ( oLitEnd - prefixStart) )
948
951
{
949
- if ( sequence . offset > ( nuint ) ( oLitEnd - virtualStart) )
952
+ if ( sequence_offset > ( nuint ) ( oLitEnd - virtualStart) )
950
953
{
951
954
return unchecked ( ( nuint ) ( - ( int ) ZSTD_ErrorCode. ZSTD_error_corruption_detected) ) ;
952
955
}
953
956
954
957
match = dictEnd + ( match - prefixStart) ;
955
- if ( match + sequence . matchLength <= dictEnd)
958
+ if ( match + sequence_matchLength <= dictEnd)
956
959
{
957
- memmove( oLitEnd, match, sequence . matchLength ) ;
960
+ memmove( oLitEnd, match, sequence_matchLength ) ;
958
961
return sequenceLength;
959
962
}
960
963
961
964
{
962
965
nuint length1 = ( nuint ) ( dictEnd - match) ;
963
966
memmove( oLitEnd, match, length1) ;
964
967
op = oLitEnd + length1;
965
- sequence . matchLength -= length1;
968
+ sequence_matchLength -= length1;
966
969
match = prefixStart;
967
970
}
968
971
}
969
972
970
973
assert( op <= oMatchEnd) ;
971
974
assert( oMatchEnd <= oend_w) ;
972
975
assert( match >= prefixStart) ;
973
- assert( sequence . matchLength >= 1 ) ;
974
- if ( sequence . offset >= 16 )
976
+ assert( sequence_matchLength >= 1 ) ;
977
+ if ( sequence_offset >= 16 )
975
978
{
976
- ZSTD_wildcopy( op, match, ( nint ) sequence . matchLength , ZSTD_overlap_e. ZSTD_no_overlap) ;
979
+ ZSTD_wildcopy( op, match, ( nint ) sequence_matchLength , ZSTD_overlap_e. ZSTD_no_overlap) ;
977
980
return sequenceLength;
978
981
}
979
982
980
- assert( sequence . offset < 16 ) ;
981
- ZSTD_overlapCopy8( & op, & match, sequence . offset ) ;
982
- if ( sequence . matchLength > 8 )
983
+ assert( sequence_offset < 16 ) ;
984
+ ZSTD_overlapCopy8( ref op, ref match, sequence_offset ) ;
985
+ if ( sequence_matchLength > 8 )
983
986
{
984
987
assert( op < oMatchEnd) ;
985
- ZSTD_wildcopy( op, match, ( nint ) sequence . matchLength - 8 , ZSTD_overlap_e. ZSTD_overlap_src_before_dst) ;
988
+ ZSTD_wildcopy( op, match, ( nint ) sequence_matchLength - 8 , ZSTD_overlap_e. ZSTD_overlap_src_before_dst) ;
986
989
}
987
990
988
991
return sequenceLength;
@@ -1319,6 +1322,8 @@ private static nuint ZSTD_decompressSequences_bodySplitLitBuffer(ZSTD_DCtx_s* dc
1319
1322
[ MethodImpl( MethodImplOptions. AggressiveInlining) ]
1320
1323
private static nuint ZSTD_decompressSequences_body( ZSTD_DCtx_s* dctx, void * dst, nuint maxDstSize, void * seqStart, nuint seqSize, int nbSeq, ZSTD_longOffset_e isLongOffset, int frame)
1321
1324
{
1325
+ // HACK, force nbSeq to stack (better register usage)
1326
+ System. Threading. Thread. VolatileRead( ref nbSeq) ;
1322
1327
byte * ip = ( byte * ) seqStart;
1323
1328
byte * iend = ip + seqSize;
1324
1329
byte * ostart = ( byte * ) dst;
@@ -1352,7 +1357,88 @@ private static nuint ZSTD_decompressSequences_body(ZSTD_DCtx_s* dctx, void* dst,
1352
1357
for ( ; ; )
1353
1358
{
1354
1359
seq_t sequence = ZSTD_decodeSequence( & seqState, isLongOffset) ;
1355
- nuint oneSeqSize = ZSTD_execSequence( op, oend, sequence, & litPtr, litEnd, prefixStart, vBase, dictEnd) ;
1360
+ nuint oneSeqSize;
1361
+ {
1362
+ var sequence_litLength = sequence. litLength;
1363
+ var sequence_matchLength = sequence. matchLength;
1364
+ var sequence_offset = sequence. offset;
1365
+ byte * oLitEnd = op + sequence_litLength;
1366
+ oneSeqSize = sequence_litLength + sequence_matchLength;
1367
+ /* risk : address space overflow (32-bits) */
1368
+ byte * oMatchEnd = op + oneSeqSize;
1369
+ /* risk : address space underflow on oend=NULL */
1370
+ byte * oend_w = oend - 32 ;
1371
+ byte * iLitEnd = litPtr + sequence_litLength;
1372
+ byte * match = oLitEnd - sequence_offset;
1373
+ assert( op != null ) ;
1374
+ assert( oend_w < oend) ;
1375
+ if ( iLitEnd > litEnd || oMatchEnd > oend_w || MEM_32bits && ( nuint ) ( oend - op) < oneSeqSize + 32 )
1376
+ {
1377
+ oneSeqSize = ZSTD_execSequenceEnd( op, oend, new seq_t { litLength = sequence_litLength, matchLength = sequence_matchLength, offset = sequence_offset } , & litPtr, litEnd, prefixStart, vBase, dictEnd) ;
1378
+ goto returnOneSeqSize;
1379
+ }
1380
+
1381
+ assert( op <= oLitEnd) ;
1382
+ assert( oLitEnd < oMatchEnd) ;
1383
+ assert( oMatchEnd <= oend) ;
1384
+ assert( iLitEnd <= litEnd) ;
1385
+ assert( oLitEnd <= oend_w) ;
1386
+ assert( oMatchEnd <= oend_w) ;
1387
+ assert( 32 >= 16 ) ;
1388
+ ZSTD_copy16( op, litPtr) ;
1389
+ if ( sequence_litLength > 16 )
1390
+ {
1391
+ ZSTD_wildcopy( op + 16 , litPtr + 16 , ( nint ) ( sequence_litLength - 16 ) , ZSTD_overlap_e. ZSTD_no_overlap) ;
1392
+ }
1393
+
1394
+ byte * opInner = oLitEnd;
1395
+ litPtr = iLitEnd;
1396
+ if ( sequence_offset > ( nuint ) ( oLitEnd - prefixStart) )
1397
+ {
1398
+ if ( sequence_offset > ( nuint ) ( oLitEnd - vBase) )
1399
+ {
1400
+ oneSeqSize = unchecked ( ( nuint ) ( - ( int ) ZSTD_ErrorCode. ZSTD_error_corruption_detected) ) ;
1401
+ goto returnOneSeqSize;
1402
+ }
1403
+
1404
+ match = dictEnd + ( match - prefixStart) ;
1405
+ if ( match + sequence_matchLength <= dictEnd)
1406
+ {
1407
+ memmove( oLitEnd, match, sequence_matchLength) ;
1408
+ goto returnOneSeqSize;
1409
+ }
1410
+
1411
+ {
1412
+ nuint length1 = ( nuint ) ( dictEnd - match) ;
1413
+ memmove( oLitEnd, match, length1) ;
1414
+ opInner = oLitEnd + length1;
1415
+ sequence_matchLength -= length1;
1416
+ match = prefixStart;
1417
+ }
1418
+ }
1419
+
1420
+ assert( opInner <= oMatchEnd) ;
1421
+ assert( oMatchEnd <= oend_w) ;
1422
+ assert( match >= prefixStart) ;
1423
+ assert( sequence_matchLength >= 1 ) ;
1424
+ if ( sequence_offset >= 16 )
1425
+ {
1426
+ ZSTD_wildcopy( opInner, match, ( nint ) sequence_matchLength, ZSTD_overlap_e. ZSTD_no_overlap) ;
1427
+ goto returnOneSeqSize;
1428
+ }
1429
+
1430
+ assert( sequence_offset < 16 ) ;
1431
+ ZSTD_overlapCopy8( ref opInner, ref match, sequence_offset) ;
1432
+ if ( sequence_matchLength > 8 )
1433
+ {
1434
+ assert( opInner < oMatchEnd) ;
1435
+ ZSTD_wildcopy( opInner, match, ( nint ) sequence_matchLength - 8 , ZSTD_overlap_e. ZSTD_overlap_src_before_dst) ;
1436
+ }
1437
+
1438
+ returnOneSeqSize:
1439
+ ;
1440
+ }
1441
+
1356
1442
if ( ERR_isError( oneSeqSize) )
1357
1443
return oneSeqSize;
1358
1444
op += oneSeqSize;
@@ -1797,5 +1883,37 @@ public static nuint ZSTD_decompressBlock(ZSTD_DCtx_s* dctx, void* dst, nuint dst
1797
1883
dctx ->previousDstEnd = ( sbyte * ) dst + dSize ;
1798
1884
return dSize ;
1799
1885
}
1886
+
1887
+ /*! ZSTD_overlapCopy8() :
1888
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
1889
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
1890
+ *
1891
+ * Precondition: *ip <= *op
1892
+ * Postcondition: *op - *op >= 8
1893
+ */
1894
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
1895
+ private static void ZSTD_overlapCopy8( ref byte * op, ref byte * ip , nuint offset )
1896
+ {
1897
+ assert ( ip <= op ) ;
1898
+ if ( offset < 8 )
1899
+ {
1900
+ int sub2 = dec64table [ offset ] ;
1901
+ op [ 0 ] = ip [ 0 ] ;
1902
+ op [ 1 ] = ip [ 1 ] ;
1903
+ op [ 2 ] = ip [ 2 ] ;
1904
+ op [ 3 ] = ip [ 3 ] ;
1905
+ ip += dec32table [ offset ] ;
1906
+ ZSTD_copy4 ( op + 4 , ip ) ;
1907
+ ip -= sub2 ;
1908
+ }
1909
+ else
1910
+ {
1911
+ ZSTD_copy8 ( op , ip ) ;
1912
+ }
1913
+
1914
+ ip += 8 ;
1915
+ op += 8 ;
1916
+ assert ( op - ip >= 8 ) ;
1917
+ }
1800
1918
}
1801
1919
}
0 commit comments