Skip to content

Commit b84ebaa

Browse files
committed
Refactored MimeReader to use ReadOnlySpan<byte> instead of unsafe byte*
The main advantage of this change is to make the code a bit nicer. The AsyncMimeReader code, especially, becomes a lot nicer with this change. The other advantage is that Spans don't interfere with the GC like pinned memory buffers do, which could theoretically mean that using MimeReader is less likely to result in memory fragmentation (although not sure on the real-world consequences of the previous code in practice). The downside is that ReadOnlySpan<byte>.IndexOf() is *slower* than the current implementation on .NET Framework and possibly also even .NET Core where the platform architecture does not have support for SIMD. That's why this PR is a Work-In-Progress and not yet merged to master.
1 parent 04cf9d7 commit b84ebaa

File tree

2 files changed

+240
-402
lines changed

2 files changed

+240
-402
lines changed

MimeKit/AsyncMimeReader.cs

Lines changed: 18 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,7 @@ async Task<bool> StepByteOrderMarkAsync (CancellationToken cancellationToken)
6363
return false;
6464
}
6565

66-
unsafe {
67-
fixed (byte* inbuf = input) {
68-
complete = StepByteOrderMark (inbuf, ref bomIndex);
69-
}
70-
}
66+
complete = StepByteOrderMark (ref bomIndex);
7167
} while (!complete && inputIndex == inputEnd);
7268

7369
return complete;
@@ -89,11 +85,7 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
8985
return;
9086
}
9187

92-
unsafe {
93-
fixed (byte* inbuf = input) {
94-
complete = StepMboxMarkerStart (inbuf, ref midline);
95-
}
96-
}
88+
complete = StepMboxMarkerStart (ref midline);
9789
} while (!complete);
9890

9991
var mboxMarkerOffset = GetOffset (inputIndex);
@@ -109,13 +101,8 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
109101
}
110102

111103
int startIndex = inputIndex;
112-
int count;
113104

114-
unsafe {
115-
fixed (byte* inbuf = input) {
116-
complete = StepMboxMarker (inbuf, out count);
117-
}
118-
}
105+
complete = StepMboxMarker (out int count);
119106

120107
// TODO: Remove beginOffset and lineNumber arguments from OnMboxMarkerReadAsync() in v5.0
121108
await OnMboxMarkerReadAsync (input, startIndex, count, mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken).ConfigureAwait (false);
@@ -179,41 +166,27 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
179166
}
180167

181168
// Scan ahead a bit to see if this looks like an invalid header.
182-
do {
183-
unsafe {
184-
fixed (byte* inbuf = input) {
185-
if (TryDetectInvalidHeader (inbuf, out invalid, out fieldNameLength, out headerFieldLength))
186-
break;
187-
}
188-
}
189-
169+
while (!TryDetectInvalidHeader (out invalid, out fieldNameLength, out headerFieldLength)) {
190170
int atleast = (inputEnd - inputIndex) + 1;
191171

192172
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast) {
193173
// Not enough input to even find the ':'... mark as invalid and continue?
194174
invalid = true;
195175
break;
196176
}
197-
} while (true);
177+
}
198178

199179
if (invalid) {
200180
// Figure out why this is an invalid header.
201181

202182
if (input[inputIndex] == (byte) '-') {
203183
// Check for a boundary marker. If the message is properly formatted, this will NEVER happen.
204-
do {
205-
unsafe {
206-
fixed (byte* inbuf = input) {
207-
if (TryCheckBoundaryWithinHeaderBlock (inbuf))
208-
break;
209-
}
210-
}
211-
184+
while (!TryCheckBoundaryWithinHeaderBlock ()) {
212185
int atleast = (inputEnd - inputIndex) + 1;
213186

214187
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
215188
break;
216-
} while (true);
189+
}
217190

218191
// Note: If a boundary was discovered, then the state will be updated to MimeParserState.Boundary.
219192
if (state == MimeParserState.Boundary)
@@ -222,19 +195,12 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
222195
// Fall through and act as if we're consuming a header.
223196
} else if (input[inputIndex] == (byte) 'F' || input[inputIndex] == (byte) '>') {
224197
// Check for an mbox-style From-line. Again, if the message is properly formatted and not truncated, this will NEVER happen.
225-
do {
226-
unsafe {
227-
fixed (byte* inbuf = input) {
228-
if (TryCheckMboxMarkerWithinHeaderBlock (inbuf))
229-
break;
230-
}
231-
}
232-
198+
while (!TryCheckMboxMarkerWithinHeaderBlock ()) {
233199
int atleast = (inputEnd - inputIndex) + 1;
234200

235201
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
236202
break;
237-
} while (true);
203+
}
238204

239205
// state will be one of the following values:
240206
// 1. Complete: This means that we've found an actual mbox marker
@@ -262,20 +228,13 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
262228
bool midline = true;
263229

264230
// Consume the header value.
265-
do {
266-
unsafe {
267-
fixed (byte* inbuf = input) {
268-
if (StepHeaderValue (inbuf, ref midline))
269-
break;
270-
}
271-
}
272-
231+
while (!StepHeaderValue (ref midline)) {
273232
if (await ReadAheadAsync (1, 0, cancellationToken).ConfigureAwait (false) == 0) {
274233
state = MimeParserState.Content;
275234
eof = true;
276235
break;
277236
}
278-
} while (true);
237+
}
279238

280239
if (toplevel && headerCount == 0 && invalid && !IsMboxMarker (headerBuffer)) {
281240
state = MimeParserState.Error;
@@ -297,19 +256,13 @@ async Task<bool> SkipBoundaryMarkerAsync (string boundary, bool endBoundary, Can
297256
long beginOffset = GetOffset (inputIndex);
298257
int beginLineNumber = lineNumber;
299258
int startIndex = inputIndex;
300-
bool result;
301259

302260
if (endBoundary)
303261
await OnMultipartEndBoundaryBeginAsync (beginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false);
304262
else
305263
await OnMultipartBoundaryBeginAsync (beginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false);
306264

307-
unsafe {
308-
fixed (byte* inbuf = input) {
309-
result = SkipBoundaryMarkerInternal (inbuf, endBoundary);
310-
}
311-
}
312-
265+
var result = SkipBoundaryMarkerInternal (endBoundary);
313266
int count = inputIndex - startIndex;
314267

315268
if (endBoundary)
@@ -380,11 +333,7 @@ async Task<ScanContentResult> ScanContentAsync (ScanContentType type, long begin
380333

381334
int contentIndex = inputIndex;
382335

383-
unsafe {
384-
fixed (byte* inbuf = input) {
385-
incomplete = ScanContent (inbuf, ref midline, ref formats);
386-
}
387-
}
336+
incomplete = ScanContent (ref midline, ref formats);
388337

389338
if (contentIndex < inputIndex) {
390339
switch (type) {
@@ -447,23 +396,11 @@ async Task<int> ConstructMessagePartAsync (int depth, CancellationToken cancella
447396
return 0;
448397
}
449398

450-
unsafe {
451-
fixed (byte* inbuf = input) {
452-
byte* start = inbuf + inputIndex;
453-
byte* inend = inbuf + inputEnd;
454-
byte* inptr = start;
455-
456-
*inend = (byte) '\n';
457-
458-
inptr = EndOfLine (inptr, inend + 1);
459-
460-
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
461-
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will
462-
// *never* be an Mbox "From " marker in Entity mode.
463-
if ((boundary = CheckBoundary (inputIndex, start, (int) (inptr - start))) != BoundaryType.None)
464-
return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex));
465-
}
466-
}
399+
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
400+
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will
401+
// *never* be an Mbox "From " marker in Entity mode.
402+
if ((boundary = CheckBoundary ()) != BoundaryType.None)
403+
return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex));
467404
}
468405

469406
// Note: When parsing non-toplevel parts, the header parser will never result in the Error state.

0 commit comments

Comments
 (0)