Skip to content

Commit 04cf9d7

Browse files
committed
Optimized MimeReader to use Span<T>.IndexOf() on .NET Core
This gets us a 20-30% performance improvement when parsing messages from a MemoryStream. Probably closer to 5-10% performance improvement for FileStreams since File I/O is such a huge factor in overall MimeReader speed. (see diff for Benchmarks.md for actual numbers)
1 parent b933635 commit 04cf9d7

File tree

4 files changed

+95
-97
lines changed

4 files changed

+95
-97
lines changed

Benchmarks/Benchmarks-net48.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,26 @@ Intel Core i7-9700 CPU 3.00GHz, 1 CPU, 8 logical and 8 physical cores
1717

1818
### MimeParser
1919

20-
| Method | Mean | Error | StdDev |
21-
|--------------------------------------------------- |------------:|-----------:|-----------:|
22-
| MimeParser_StarTrekMessage | 123.34 us | 2.215 us | 2.071 us |
23-
| MimeParser_StarTrekMessagePersistent | 105.51 us | 0.962 us | 0.853 us |
24-
| MimeParser_ContentLengthMbox | 1,093.34 us | 15.356 us | 14.364 us |
25-
| MimeParser_ContentLengthMboxPersistent | 1,001.96 us | 7.074 us | 5.907 us |
26-
| MimeParser_JwzMbox | 8,900.18 us | 74.639 us | 69.817 us |
27-
| MimeParser_JwzMboxPersistent | 7,991.92 us | 107.522 us | 95.315 us |
28-
| MimeParser_HeaderStressTest | 31.96 us | 0.312 us | 0.277 us |
29-
| ExperimentalMimeParser_StarTrekMessage | 109.28 us | 0.570 us | 0.533 us |
30-
| ExperimentalMimeParser_StarTrekMessagePersistent | 107.95 us | 1.304 us | 1.156 us |
31-
| ExperimentalMimeParser_ContentLengthMbox | 1,019.40 us | 4.562 us | 3.809 us |
32-
| ExperimentalMimeParser_ContentLengthMboxPersistent | 988.76 us | 8.919 us | 7.906 us |
33-
| ExperimentalMimeParser_JwzMbox | 8,323.95 us | 59.375 us | 52.634 us |
34-
| ExperimentalMimeParser_JwzMboxPersistent | 7,830.84 us | 69.701 us | 58.203 us |
35-
| ExperimentalMimeParser_HeaderStressTest | 25.08 us | 0.235 us | 0.208 us |
36-
| MimeReader_StarTrekMessage | 81.76 us | 0.706 us | 0.626 us |
37-
| MimeReader_ContentLengthMbox | 707.73 us | 4.011 us | 3.349 us |
38-
| MimeReader_JwzMbox | 6,340.44 us | 121.161 us | 118.996 us |
39-
| MimeReader_HeaderStressTest | 15.78 us | 0.062 us | 0.055 us |
20+
| Method | Mean | Error | StdDev |
21+
|--------------------------------------------------- |------------:|----------:|----------:|
22+
| MimeParser_StarTrekMessage | 129.88 us | 2.487 us | 2.765 us |
23+
| MimeParser_StarTrekMessagePersistent | 114.42 us | 2.191 us | 2.050 us |
24+
| MimeParser_ContentLengthMbox | 1,094.80 us | 6.510 us | 5.083 us |
25+
| MimeParser_ContentLengthMboxPersistent | 1,016.36 us | 15.169 us | 14.189 us |
26+
| MimeParser_JwzMbox | 8,887.55 us | 52.108 us | 43.513 us |
27+
| MimeParser_JwzMboxPersistent | 7,958.80 us | 81.472 us | 76.209 us |
28+
| MimeParser_HeaderStressTest | 31.98 us | 0.233 us | 0.207 us |
29+
| ExperimentalMimeParser_StarTrekMessage | 120.56 us | 1.018 us | 0.903 us |
30+
| ExperimentalMimeParser_StarTrekMessagePersistent | 106.50 us | 0.631 us | 0.527 us |
31+
| ExperimentalMimeParser_ContentLengthMbox | 1,054.48 us | 5.349 us | 4.466 us |
32+
| ExperimentalMimeParser_ContentLengthMboxPersistent | 957.46 us | 5.222 us | 4.630 us |
33+
| ExperimentalMimeParser_JwzMbox | 8,555.51 us | 80.363 us | 71.239 us |
34+
| ExperimentalMimeParser_JwzMboxPersistent | 7,504.97 us | 60.622 us | 53.740 us |
35+
| ExperimentalMimeParser_HeaderStressTest | 24.93 us | 0.173 us | 0.144 us |
36+
| MimeReader_StarTrekMessage | 86.82 us | 1.015 us | 0.900 us |
37+
| MimeReader_ContentLengthMbox | 691.30 us | 6.961 us | 6.171 us |
38+
| MimeReader_JwzMbox | 5,918.21 us | 61.377 us | 51.252 us |
39+
| MimeReader_HeaderStressTest | 15.62 us | 0.081 us | 0.063 us |
4040

4141
### BestEncodingFilter
4242

Benchmarks/Benchmarks.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,26 @@ Intel Core i7-9700 CPU 3.00GHz, 1 CPU, 8 logical and 8 physical cores
1818

1919
### MimeParser
2020

21-
| Method | Mean | Error | StdDev |
22-
|--------------------------------------------------- |-------------:|------------:|------------:|
23-
| MimeParser_StarTrekMessage | 107.756 us | 2.1442 us | 5.9058 us |
24-
| MimeParser_StarTrekMessagePersistent | 83.645 us | 0.7263 us | 0.6439 us |
25-
| MimeParser_ContentLengthMbox | 797.602 us | 7.2364 us | 6.4148 us |
26-
| MimeParser_ContentLengthMboxPersistent | 715.624 us | 5.7660 us | 5.1115 us |
27-
| MimeParser_JwzMbox | 6,917.998 us | 70.9975 us | 66.4111 us |
28-
| MimeParser_JwzMboxPersistent | 6,196.110 us | 123.8234 us | 161.0054 us |
29-
| MimeParser_HeaderStressTest | 20.632 us | 0.2864 us | 0.2539 us |
30-
| ExperimentalMimeParser_StarTrekMessage | 99.692 us | 0.5002 us | 0.4177 us |
31-
| ExperimentalMimeParser_StarTrekMessagePersistent | 90.032 us | 1.4249 us | 1.3329 us |
32-
| ExperimentalMimeParser_ContentLengthMbox | 787.329 us | 8.0334 us | 7.1214 us |
33-
| ExperimentalMimeParser_ContentLengthMboxPersistent | 688.084 us | 5.1357 us | 4.5527 us |
34-
| ExperimentalMimeParser_JwzMbox | 6,907.650 us | 69.6696 us | 65.1690 us |
35-
| ExperimentalMimeParser_JwzMboxPersistent | 6,434.393 us | 126.1687 us | 192.6729 us |
36-
| ExperimentalMimeParser_HeaderStressTest | 13.071 us | 0.2190 us | 0.1941 us |
37-
| MimeReader_StarTrekMessage | 79.039 us | 0.6652 us | 0.5193 us |
38-
| MimeReader_ContentLengthMbox | 506.600 us | 2.4274 us | 2.0270 us |
39-
| MimeReader_JwzMbox | 4,986.448 us | 30.5282 us | 27.0625 us |
40-
| MimeReader_HeaderStressTest | 8.118 us | 0.0573 us | 0.0478 us |
21+
| Method | Mean | Error | StdDev |
22+
|--------------------------------------------------- |-------------:|-----------:|-----------:|
23+
| MimeParser_StarTrekMessage | 90.435 us | 0.7455 us | 0.6973 us |
24+
| MimeParser_StarTrekMessagePersistent | 81.716 us | 0.5504 us | 0.4596 us |
25+
| MimeParser_ContentLengthMbox | 773.167 us | 9.6001 us | 8.0165 us |
26+
| MimeParser_ContentLengthMboxPersistent | 712.220 us | 5.6727 us | 4.7370 us |
27+
| MimeParser_JwzMbox | 6,837.832 us | 67.6953 us | 60.0101 us |
28+
| MimeParser_JwzMboxPersistent | 6,115.685 us | 31.6801 us | 24.7338 us |
29+
| MimeParser_HeaderStressTest | 19.415 us | 0.2653 us | 0.2352 us |
30+
| ExperimentalMimeParser_StarTrekMessage | 69.727 us | 0.5362 us | 0.4753 us |
31+
| ExperimentalMimeParser_StarTrekMessagePersistent | 58.905 us | 0.6591 us | 0.5503 us |
32+
| ExperimentalMimeParser_ContentLengthMbox | 608.324 us | 8.1063 us | 7.5826 us |
33+
| ExperimentalMimeParser_ContentLengthMboxPersistent | 524.317 us | 7.7340 us | 6.4582 us |
34+
| ExperimentalMimeParser_JwzMbox | 5,265.384 us | 56.6687 us | 50.2353 us |
35+
| ExperimentalMimeParser_JwzMboxPersistent | 4,474.884 us | 89.2177 us | 87.6238 us |
36+
| ExperimentalMimeParser_HeaderStressTest | 11.653 us | 0.1319 us | 0.1169 us |
37+
| MimeReader_StarTrekMessage | 48.659 us | 0.2957 us | 0.2469 us |
38+
| MimeReader_ContentLengthMbox | 353.706 us | 6.8101 us | 6.3702 us |
39+
| MimeReader_JwzMbox | 3,398.939 us | 32.1085 us | 28.4634 us |
40+
| MimeReader_HeaderStressTest | 6.625 us | 0.0616 us | 0.0481 us |
4141

4242
### BestEncodingFilter
4343

MimeKit/AsyncMimeReader.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,7 @@ async Task<int> ConstructMessagePartAsync (int depth, CancellationToken cancella
455455

456456
*inend = (byte) '\n';
457457

458-
while (*inptr != (byte) '\n')
459-
inptr++;
458+
inptr = EndOfLine (inptr, inend + 1);
460459

461460
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
462461
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will

MimeKit/MimeReader.cs

Lines changed: 54 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,6 +1459,52 @@ static unsafe bool IsMboxMarker (byte[] text, bool allowMunged = false)
14591459
}
14601460
}
14611461

1462+
[MethodImpl (MethodImplOptions.AggressiveInlining)]
1463+
static unsafe byte* EndOfLine (byte* inptr, byte* inend)
1464+
{
1465+
#if NETCOREAPP
1466+
var span = new ReadOnlySpan<byte> (inptr, (int) (inend - inptr));
1467+
1468+
return inptr += span.IndexOf ((byte) '\n');
1469+
#else
1470+
// scan for a linefeed character until we are 4-byte aligned.
1471+
switch (((long) inptr) & 0x03) {
1472+
case 1:
1473+
if (*inptr == (byte) '\n')
1474+
break;
1475+
inptr++;
1476+
goto case 2;
1477+
case 2:
1478+
if (*inptr == (byte) '\n')
1479+
break;
1480+
inptr++;
1481+
goto case 3;
1482+
case 3:
1483+
if (*inptr != (byte) '\n')
1484+
inptr++;
1485+
break;
1486+
}
1487+
1488+
if (*inptr != (byte) '\n') {
1489+
// -funroll-loops, yippee ki-yay.
1490+
do {
1491+
uint mask = *((uint*) inptr) ^ 0x0A0A0A0A;
1492+
mask = ((mask - 0x01010101) & (~mask & 0x80808080));
1493+
1494+
if (mask != 0)
1495+
break;
1496+
1497+
inptr += 4;
1498+
} while (true);
1499+
1500+
while (*inptr != (byte) '\n')
1501+
inptr++;
1502+
}
1503+
1504+
return inptr;
1505+
#endif
1506+
}
1507+
14621508
unsafe bool StepMboxMarkerStart (byte* inbuf, ref bool midline)
14631509
{
14641510
byte* inptr = inbuf + inputIndex;
@@ -1468,9 +1514,7 @@ unsafe bool StepMboxMarkerStart (byte* inbuf, ref bool midline)
14681514

14691515
if (midline) {
14701516
// we're in the middle of a line, so we need to scan for the end of the line
1471-
// TODO: unroll this loop?
1472-
while (*inptr != (byte) '\n')
1473-
inptr++;
1517+
inptr = EndOfLine (inptr, inend + 1);
14741518

14751519
if (inptr == inend) {
14761520
// we don't have enough input data
@@ -1492,9 +1536,7 @@ unsafe bool StepMboxMarkerStart (byte* inbuf, ref bool midline)
14921536
}
14931537

14941538
// scan for the end of the line
1495-
// TODO: unroll this loop?
1496-
while (*inptr != (byte) '\n')
1497-
inptr++;
1539+
inptr = EndOfLine (inptr, inend + 1);
14981540

14991541
if (inptr == inend) {
15001542
// we don't have enough data to check for a From line
@@ -1522,9 +1564,7 @@ unsafe bool StepMboxMarker (byte* inbuf, out int count)
15221564
*inend = (byte) '\n';
15231565

15241566
// scan for the end of the line
1525-
// TODO: unroll this loop?
1526-
while (*inptr != (byte) '\n')
1527-
inptr++;
1567+
inptr = EndOfLine (inptr, inend + 1);
15281568

15291569
count = (int) (inptr - start);
15301570

@@ -1711,9 +1751,7 @@ unsafe bool StepHeaderValue (byte* inbuf, ref bool midline)
17111751
*inend = (byte) '\n';
17121752

17131753
while (inptr < inend && (midline || IsBlank (*inptr))) {
1714-
// TODO: unroll this loop?
1715-
while (*inptr != (byte) '\n')
1716-
inptr++;
1754+
inptr = EndOfLine (inptr, inend + 1);
17171755

17181756
if (inptr == inend) {
17191757
// We've reached the end of the input buffer, and we are currently in the middle of a line.
@@ -1775,9 +1813,7 @@ unsafe bool TryCheckBoundaryWithinHeaderBlock (byte* inbuf)
17751813

17761814
*inend = (byte) '\n';
17771815

1778-
// TODO: unroll this loop?
1779-
while (*inptr != (byte) '\n')
1780-
inptr++;
1816+
inptr = EndOfLine (inptr, inend + 1);
17811817

17821818
if (inptr == inend)
17831819
return false;
@@ -2004,8 +2040,7 @@ unsafe bool SkipBoundaryMarkerInternal (byte* inbuf, bool endBoundary)
20042040
inptr += currentBoundary.Length;
20052041

20062042
// skip over any trailing whitespace
2007-
while (*inptr != (byte) '\n')
2008-
inptr++;
2043+
inptr = EndOfLine (inptr, inend + 1);
20092044

20102045
if (inptr < inend) {
20112046
inputIndex = (int) (inptr - inbuf);
@@ -2258,41 +2293,7 @@ unsafe bool ScanContent (byte* inbuf, ref bool midline, ref bool[] formats)
22582293
while (inptr < inend) {
22592294
byte* start = inptr;
22602295

2261-
// Note: we can always depend on byte[] arrays being 4-byte aligned on 32bit and 64bit architectures
2262-
// so we can safely use the startIndex instead of `((long) inptr) & 3` to determine the alignment.
2263-
switch (startIndex & 3) {
2264-
case 1:
2265-
if (*inptr == (byte) '\n')
2266-
break;
2267-
inptr++;
2268-
goto case 2;
2269-
case 2:
2270-
if (*inptr == (byte) '\n')
2271-
break;
2272-
inptr++;
2273-
goto case 3;
2274-
case 3:
2275-
if (*inptr != (byte) '\n')
2276-
inptr++;
2277-
break;
2278-
}
2279-
2280-
if (*inptr != (byte) '\n') {
2281-
// -funroll-loops, yippee ki-yay.
2282-
do {
2283-
uint mask = *((uint*) inptr) ^ 0x0A0A0A0A;
2284-
mask = ((mask - 0x01010101) & (~mask & 0x80808080));
2285-
2286-
if (mask != 0)
2287-
break;
2288-
2289-
inptr += 4;
2290-
} while (true);
2291-
2292-
while (*inptr != (byte) '\n')
2293-
inptr++;
2294-
}
2295-
2296+
inptr = EndOfLine (inptr, inend + 1);
22962297
length = (int) (inptr - start);
22972298

22982299
if (inptr < inend) {
@@ -2464,9 +2465,7 @@ unsafe int ConstructMessagePart (byte* inbuf, int depth, CancellationToken cance
24642465

24652466
*inend = (byte) '\n';
24662467

2467-
// TODO: unroll this loop?
2468-
while (*inptr != (byte) '\n')
2469-
inptr++;
2468+
inptr = EndOfLine (inptr, inend + 1);
24702469

24712470
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
24722471
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will

0 commit comments

Comments
 (0)