Skip to content

Commit 1921c1b

Browse files
committed
Optimized MimeParser to use Span<T>.IndexOf() on .NET Core
Like the previous MimeReader optimization patch, this gets us a 20-30% performance improvement when parsing messages from a MemoryStream. Probably closer to 5-10% performance improvement for FileStreams since File I/O is such a huge factor in overall MimeParser speed. (see diff for Benchmarks.md for actual numbers)
1 parent c3bef3e commit 1921c1b

File tree

4 files changed

+93
-85
lines changed

4 files changed

+93
-85
lines changed

Benchmarks/Benchmarks-net48.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,26 @@ Intel Core i7-9700 CPU 3.00GHz, 1 CPU, 8 logical and 8 physical cores
1717

1818
### MimeParser
1919

20-
| Method | Mean | Error | StdDev |
21-
|--------------------------------------------------- |------------:|----------:|----------:|
22-
| MimeParser_StarTrekMessage | 129.88 us | 2.487 us | 2.765 us |
23-
| MimeParser_StarTrekMessagePersistent | 114.42 us | 2.191 us | 2.050 us |
24-
| MimeParser_ContentLengthMbox | 1,094.80 us | 6.510 us | 5.083 us |
25-
| MimeParser_ContentLengthMboxPersistent | 1,016.36 us | 15.169 us | 14.189 us |
26-
| MimeParser_JwzMbox | 8,887.55 us | 52.108 us | 43.513 us |
27-
| MimeParser_JwzMboxPersistent | 7,958.80 us | 81.472 us | 76.209 us |
28-
| MimeParser_HeaderStressTest | 31.98 us | 0.233 us | 0.207 us |
29-
| ExperimentalMimeParser_StarTrekMessage | 120.56 us | 1.018 us | 0.903 us |
30-
| ExperimentalMimeParser_StarTrekMessagePersistent | 106.50 us | 0.631 us | 0.527 us |
31-
| ExperimentalMimeParser_ContentLengthMbox | 1,054.48 us | 5.349 us | 4.466 us |
32-
| ExperimentalMimeParser_ContentLengthMboxPersistent | 957.46 us | 5.222 us | 4.630 us |
33-
| ExperimentalMimeParser_JwzMbox | 8,555.51 us | 80.363 us | 71.239 us |
34-
| ExperimentalMimeParser_JwzMboxPersistent | 7,504.97 us | 60.622 us | 53.740 us |
35-
| ExperimentalMimeParser_HeaderStressTest | 24.93 us | 0.173 us | 0.144 us |
36-
| MimeReader_StarTrekMessage | 86.82 us | 1.015 us | 0.900 us |
37-
| MimeReader_ContentLengthMbox | 691.30 us | 6.961 us | 6.171 us |
38-
| MimeReader_JwzMbox | 5,918.21 us | 61.377 us | 51.252 us |
39-
| MimeReader_HeaderStressTest | 15.62 us | 0.081 us | 0.063 us |
20+
| Method | Mean | Error | StdDev |
21+
|--------------------------------------------------- |------------:|-----------:|-----------:|
22+
| MimeParser_StarTrekMessage | 135.64 us | 1.390 us | 1.161 us |
23+
| MimeParser_StarTrekMessagePersistent | 123.61 us | 1.596 us | 1.492 us |
24+
| MimeParser_ContentLengthMbox | 1,130.15 us | 18.279 us | 35.651 us |
25+
| MimeParser_ContentLengthMboxPersistent | 1,018.27 us | 12.764 us | 10.658 us |
26+
| MimeParser_JwzMbox | 8,771.53 us | 44.376 us | 39.338 us |
27+
| MimeParser_JwzMboxPersistent | 7,974.29 us | 62.088 us | 55.040 us |
28+
| MimeParser_HeaderStressTest | 30.62 us | 0.155 us | 0.145 us |
29+
| ExperimentalMimeParser_StarTrekMessage | 120.82 us | 2.290 us | 2.249 us |
30+
| ExperimentalMimeParser_StarTrekMessagePersistent | 104.48 us | 0.664 us | 0.622 us |
31+
| ExperimentalMimeParser_ContentLengthMbox | 1,036.78 us | 14.060 us | 12.464 us |
32+
| ExperimentalMimeParser_ContentLengthMboxPersistent | 979.43 us | 5.135 us | 4.552 us |
33+
| ExperimentalMimeParser_JwzMbox | 8,508.28 us | 58.749 us | 49.058 us |
34+
| ExperimentalMimeParser_JwzMboxPersistent | 7,491.65 us | 130.215 us | 121.803 us |
35+
| ExperimentalMimeParser_HeaderStressTest | 24.53 us | 0.200 us | 0.167 us |
36+
| MimeReader_StarTrekMessage | 85.77 us | 1.237 us | 1.033 us |
37+
| MimeReader_ContentLengthMbox | 679.32 us | 4.484 us | 3.975 us |
38+
| MimeReader_JwzMbox | 5,882.19 us | 89.402 us | 83.627 us |
39+
| MimeReader_HeaderStressTest | 15.70 us | 0.293 us | 0.314 us |
4040

4141
### BestEncodingFilter
4242

Benchmarks/Benchmarks.md

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,27 @@ Intel Core i7-9700 CPU 3.00GHz, 1 CPU, 8 logical and 8 physical cores
1818

1919
### MimeParser
2020

21-
| Method | Mean | Error | StdDev |
22-
|--------------------------------------------------- |-------------:|-----------:|-----------:|
23-
| MimeParser_StarTrekMessage | 90.435 us | 0.7455 us | 0.6973 us |
24-
| MimeParser_StarTrekMessagePersistent | 81.716 us | 0.5504 us | 0.4596 us |
25-
| MimeParser_ContentLengthMbox | 773.167 us | 9.6001 us | 8.0165 us |
26-
| MimeParser_ContentLengthMboxPersistent | 712.220 us | 5.6727 us | 4.7370 us |
27-
| MimeParser_JwzMbox | 6,837.832 us | 67.6953 us | 60.0101 us |
28-
| MimeParser_JwzMboxPersistent | 6,115.685 us | 31.6801 us | 24.7338 us |
29-
| MimeParser_HeaderStressTest | 19.415 us | 0.2653 us | 0.2352 us |
30-
| ExperimentalMimeParser_StarTrekMessage | 69.727 us | 0.5362 us | 0.4753 us |
31-
| ExperimentalMimeParser_StarTrekMessagePersistent | 58.905 us | 0.6591 us | 0.5503 us |
32-
| ExperimentalMimeParser_ContentLengthMbox | 608.324 us | 8.1063 us | 7.5826 us |
33-
| ExperimentalMimeParser_ContentLengthMboxPersistent | 524.317 us | 7.7340 us | 6.4582 us |
34-
| ExperimentalMimeParser_JwzMbox | 5,265.384 us | 56.6687 us | 50.2353 us |
35-
| ExperimentalMimeParser_JwzMboxPersistent | 4,474.884 us | 89.2177 us | 87.6238 us |
36-
| ExperimentalMimeParser_HeaderStressTest | 11.653 us | 0.1319 us | 0.1169 us |
37-
| MimeReader_StarTrekMessage | 48.659 us | 0.2957 us | 0.2469 us |
38-
| MimeReader_ContentLengthMbox | 353.706 us | 6.8101 us | 6.3702 us |
39-
| MimeReader_JwzMbox | 3,398.939 us | 32.1085 us | 28.4634 us |
40-
| MimeReader_HeaderStressTest | 6.625 us | 0.0616 us | 0.0481 us |
21+
| Method | Mean | Error | StdDev |
22+
|--------------------------------------------------- |-------------:|-----------:|------------:|
23+
| MimeParser_StarTrekMessage | 71.731 us | 0.3290 us | 0.2747 us |
24+
| MimeParser_StarTrekMessagePersistent | 60.809 us | 0.8136 us | 0.7213 us |
25+
| MimeParser_ContentLengthMbox | 621.492 us | 3.7083 us | 2.8952 us |
26+
| MimeParser_ContentLengthMboxPersistent | 551.683 us | 10.7364 us | 10.5446 us |
27+
| MimeParser_JwzMbox | 5,238.200 us | 67.6061 us | 63.2388 us |
28+
| MimeParser_JwzMboxPersistent | 4,612.795 us | 91.5963 us | 119.1010 us |
29+
| MimeParser_HeaderStressTest | 15.356 us | 0.0761 us | 0.0635 us |
30+
31+
| ExperimentalMimeParser_StarTrekMessage | 70.105 us | 1.1094 us | 0.9834 us |
32+
| ExperimentalMimeParser_StarTrekMessagePersistent | 57.515 us | 0.6193 us | 0.5490 us |
33+
| ExperimentalMimeParser_ContentLengthMbox | 622.374 us | 11.9024 us | 14.6173 us |
34+
| ExperimentalMimeParser_ContentLengthMboxPersistent | 534.965 us | 4.6371 us | 4.1107 us |
35+
| ExperimentalMimeParser_JwzMbox | 5,030.161 us | 63.8917 us | 56.6384 us |
36+
| ExperimentalMimeParser_JwzMboxPersistent | 4,557.651 us | 40.6831 us | 33.9722 us |
37+
| ExperimentalMimeParser_HeaderStressTest | 12.365 us | 0.2359 us | 0.2091 us |
38+
| MimeReader_StarTrekMessage | 47.750 us | 0.2926 us | 0.2444 us |
39+
| MimeReader_ContentLengthMbox | 353.960 us | 2.9889 us | 2.7958 us |
40+
| MimeReader_JwzMbox | 3,209.473 us | 26.7118 us | 22.3056 us |
41+
| MimeReader_HeaderStressTest | 6.901 us | 0.1346 us | 0.1259 us |
4142

4243
### BestEncodingFilter
4344

MimeKit/AsyncMimeParser.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,7 @@ async Task ConstructMessagePartAsync (MessagePart rfc822, MimeEntityEndEventArgs
301301

302302
*inend = (byte) '\n';
303303

304-
while (*inptr != (byte) '\n')
305-
inptr++;
304+
inptr = EndOfLine (inptr, inend + 1);
306305

307306
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
308307
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will

MimeKit/MimeParser.cs

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,52 @@ static unsafe bool CStringsEqual (byte* str1, byte* str2, int length)
725725
return true;
726726
}
727727

728+
[MethodImpl (MethodImplOptions.AggressiveInlining)]
729+
static unsafe byte* EndOfLine (byte* inptr, byte* inend)
730+
{
731+
#if NETCOREAPP
732+
var span = new ReadOnlySpan<byte> (inptr, (int) (inend - inptr));
733+
734+
return inptr += span.IndexOf ((byte) '\n');
735+
#else
736+
// scan for a linefeed character until we are 4-byte aligned.
737+
switch (((long) inptr) & 0x03) {
738+
case 1:
739+
if (*inptr == (byte) '\n')
740+
break;
741+
inptr++;
742+
goto case 2;
743+
case 2:
744+
if (*inptr == (byte) '\n')
745+
break;
746+
inptr++;
747+
goto case 3;
748+
case 3:
749+
if (*inptr != (byte) '\n')
750+
inptr++;
751+
break;
752+
}
753+
754+
if (*inptr != (byte) '\n') {
755+
// -funroll-loops, yippee ki-yay.
756+
do {
757+
uint mask = *((uint*) inptr) ^ 0x0A0A0A0A;
758+
mask = ((mask - 0x01010101) & (~mask & 0x80808080));
759+
760+
if (mask != 0)
761+
break;
762+
763+
inptr += 4;
764+
} while (true);
765+
766+
while (*inptr != (byte) '\n')
767+
inptr++;
768+
}
769+
770+
return inptr;
771+
#endif
772+
}
773+
728774
unsafe void StepByteOrderMark (byte* inbuf, ref int bomIndex)
729775
{
730776
byte* inptr = inbuf + inputIndex;
@@ -787,8 +833,7 @@ unsafe bool StepMboxMarker (byte* inbuf, ref int left)
787833
byte* start = inptr;
788834

789835
// scan for the end of the line
790-
while (*inptr != (byte) '\n')
791-
inptr++;
836+
inptr = EndOfLine (inptr, inend + 1);
792837

793838
if (inptr == inend) {
794839
// we don't have enough input data
@@ -994,8 +1039,7 @@ unsafe bool StepHeaders (byte* inbuf, ref bool scanningFieldName, ref bool check
9941039

9951040
scanningFieldName = false;
9961041

997-
while (*inptr != (byte) '\n')
998-
inptr++;
1042+
inptr = EndOfLine (inptr, inend + 1);
9991043

10001044
if (inptr == inend) {
10011045
// we didn't manage to slurp up a full line, save what we have and refill our input buffer
@@ -1133,8 +1177,7 @@ unsafe bool InnerSkipLine (byte* inbuf, bool consumeNewLine)
11331177

11341178
*inend = (byte) '\n';
11351179

1136-
while (*inptr != (byte) '\n')
1137-
inptr++;
1180+
inptr = EndOfLine (inptr, inend + 1);
11381181

11391182
if (inptr < inend) {
11401183
inputIndex = (int) (inptr - inbuf);
@@ -1357,41 +1400,7 @@ unsafe bool ScanContent (byte* inbuf, ref bool midline, ref bool[] formats)
13571400
while (inptr < inend) {
13581401
byte* start = inptr;
13591402

1360-
// Note: we can always depend on byte[] arrays being 4-byte aligned on 32bit and 64bit architectures
1361-
// so we can safely use the startIndex instead of `((long) inptr) & 3` to determine the alignment.
1362-
switch (startIndex & 3) {
1363-
case 1:
1364-
if (*inptr == (byte) '\n')
1365-
break;
1366-
inptr++;
1367-
goto case 2;
1368-
case 2:
1369-
if (*inptr == (byte) '\n')
1370-
break;
1371-
inptr++;
1372-
goto case 3;
1373-
case 3:
1374-
if (*inptr != (byte) '\n')
1375-
inptr++;
1376-
break;
1377-
}
1378-
1379-
if (*inptr != (byte) '\n') {
1380-
// -funroll-loops, yippee ki-yay.
1381-
do {
1382-
uint mask = *((uint*) inptr) ^ 0x0A0A0A0A;
1383-
mask = ((mask - 0x01010101) & (~mask & 0x80808080));
1384-
1385-
if (mask != 0)
1386-
break;
1387-
1388-
inptr += 4;
1389-
} while (true);
1390-
1391-
while (*inptr != (byte) '\n')
1392-
inptr++;
1393-
}
1394-
1403+
inptr = EndOfLine (inptr, inend + 1);
13951404
length = (int) (inptr - start);
13961405

13971406
if (inptr < inend) {
@@ -1560,8 +1569,7 @@ unsafe void ConstructMessagePart (MessagePart rfc822, MimeEntityEndEventArgs arg
15601569

15611570
*inend = (byte) '\n';
15621571

1563-
while (*inptr != (byte) '\n')
1564-
inptr++;
1572+
inptr = EndOfLine (inptr, inend + 1);
15651573

15661574
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
15671575
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will

0 commit comments

Comments
 (0)