diff --git a/src/CsvHelper/Configuration/ConfigurationFunctions.cs b/src/CsvHelper/Configuration/ConfigurationFunctions.cs index 99ed29616..a71f946ee 100644 --- a/src/CsvHelper/Configuration/ConfigurationFunctions.cs +++ b/src/CsvHelper/Configuration/ConfigurationFunctions.cs @@ -180,11 +180,18 @@ public static string GetDynamicPropertyName(GetDynamicPropertyNameArgs args) /// Return the detected delimiter or null if one wasn't found. /// /// The args. - public static string GetDelimiter(GetDelimiterArgs args) + public static char GetDelimiter(GetDelimiterArgs args) { - var text = args.Text; + var text = args.Text.ToString(); var config = args.Configuration; + if (args.Configuration.CultureInfo.TextInfo.ListSeparator.Length > 1) + { + throw new ConfigurationException($"The CultureInfo.TextInfo.ListSeparator "); + } + + var listSeparator = config.CultureInfo.TextInfo.ListSeparator[0]; + if (config.Mode == CsvMode.RFC4180) { // Remove text in between pairs of quotes. @@ -202,7 +209,7 @@ public static string GetDelimiter(GetDelimiterArgs args) newLine = "\r\n|\r|\n"; } - var lineDelimiterCounts = new List>(); + var lineDelimiterCounts = new List>(); while (text.Length > 0) { // Since all escaped text has been removed, we can reliably read line by line. @@ -211,11 +218,11 @@ public static string GetDelimiter(GetDelimiterArgs args) if (line.Length > 0) { - var delimiterCounts = new Dictionary(); + var delimiterCounts = new Dictionary(); foreach (var delimiter in config.DetectDelimiterValues) { // Escape regex special chars to use as regex pattern. - var pattern = Regex.Replace(delimiter, @"([.$^{\[(|)*+?\\])", "\\$1"); + var pattern = Regex.Replace(delimiter.ToString(), @"([.$^{\[(|)*+?\\])", "\\$1"); delimiterCounts[delimiter] = Regex.Matches(line, pattern).Count; } @@ -247,11 +254,11 @@ orderby sum descending } ).ToList(); - string? newDelimiter = null; - if (delimiters.Any(x => x.Delimiter == config.CultureInfo.TextInfo.ListSeparator) && lineDelimiterCounts.Count > 1) + char? newDelimiter = null; + if (delimiters.Any(x => x.Delimiter == listSeparator) && lineDelimiterCounts.Count > 1) { // The culture's separator is on every line. Assume this is the delimiter. - newDelimiter = config.CultureInfo.TextInfo.ListSeparator; + newDelimiter = listSeparator; } else { diff --git a/src/CsvHelper/Configuration/CsvOptions.cs b/src/CsvHelper/Configuration/CsvOptions.cs index 2b2878ee9..b0bbd77bf 100644 --- a/src/CsvHelper/Configuration/CsvOptions.cs +++ b/src/CsvHelper/Configuration/CsvOptions.cs @@ -1,11 +1,34 @@ namespace CsvHelper.Configuration; +/// +/// Common configuration options for reading and writing CSV files. +/// public abstract record CsvOptions { - public char Delimiter { get; init; } = ','; + /// + /// The mode. + /// See for more details. + /// + public CsvMode Mode { get; init; } + /// + /// The delimiter used to separate fields. + /// Default is ,. + /// If you need a multi-character delimiter, use to replace your newline with a single character. + /// + public char Delimiter { get; internal set; } = ','; + + /// + /// The character used to escape characters. + /// Default is '"'. + /// public char Escape { get; init; } = '\"'; + /// + /// The newline character to use. + /// If not set, the parser uses one of \r\n, \r, or \n. + /// If you need a multi-character newline, use to replace your newline with a single character. + /// public char? NewLine { get; init; } internal int BufferSize = 0x1000; diff --git a/src/CsvHelper/Configuration/CsvParserOptions.cs b/src/CsvHelper/Configuration/CsvParserOptions.cs index 101f1106a..289b1c677 100644 --- a/src/CsvHelper/Configuration/CsvParserOptions.cs +++ b/src/CsvHelper/Configuration/CsvParserOptions.cs @@ -1,13 +1,33 @@ namespace CsvHelper.Configuration; +/// +/// Configuration options used for . +/// public record CsvParserOptions : CsvOptions { - public CsvMode Mode { get; init; } - + /// + /// Cache fields that are created when parsing. + /// Default is false. + /// public bool CacheFields { get; init; } + /// + /// Strategy used for parsing. + /// Defaults to the highest performance your framework and CPU supports. + /// public ParsingStrategy? ParsingStrategy { get; init; } + /// + /// Detect the delimiter instead of using the delimiter from configuration. + /// Default is false. + /// + public bool DetectDelimiter { get; init; } + + /// + /// The function that is called when is enabled. + /// + public GetDelimiter GetDelimiter { get; set; } = ConfigurationFunctions.GetDelimiter; + internal StringCreator StringCreator = (chars, i) #if NET6_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER => new string(chars); diff --git a/src/CsvHelper/Configuration/CsvSerializerOptions.cs b/src/CsvHelper/Configuration/CsvSerializerOptions.cs index 3054c1555..9e466219f 100644 --- a/src/CsvHelper/Configuration/CsvSerializerOptions.cs +++ b/src/CsvHelper/Configuration/CsvSerializerOptions.cs @@ -1,11 +1,19 @@ namespace CsvHelper.Configuration; +/// +/// A function that is used to determine if a field should get escaped when writing. +/// +/// The field. public delegate bool ShouldEscape(ReadOnlySpan field); +/// +/// Configuration options used for . +/// public record CsvSerializerOptions : CsvOptions { - public CsvMode Mode { get; init; } - + /// + /// A function that is used to determine if a field should get escaped when writing. + /// public ShouldEscape? ShouldEscape { get; init; } internal CsvModeEscape ModeEscape = ModeRfc4180.Escape; diff --git a/src/CsvHelper/Configuration/IParserConfiguration.cs b/src/CsvHelper/Configuration/IParserConfiguration.cs index 16da49f32..f61df8ce8 100644 --- a/src/CsvHelper/Configuration/IParserConfiguration.cs +++ b/src/CsvHelper/Configuration/IParserConfiguration.cs @@ -122,7 +122,7 @@ public interface IParserConfiguration /// The delimiter used to separate fields. /// Default is . /// - string Delimiter { get; } + char Delimiter { get; } /// /// Detect the delimiter instead of using the delimiter from configuration. @@ -139,7 +139,7 @@ public interface IParserConfiguration /// The possible delimiter values used when detecting the delimiter. /// Default is [",", ";", "|", "\t"]. /// - string[] DetectDelimiterValues { get; } + char[] DetectDelimiterValues { get; } /// /// The character used to escape characters. diff --git a/src/CsvHelper/CsvParser.cs b/src/CsvHelper/CsvParser.cs index 41db87cd0..677a0d9a4 100644 --- a/src/CsvHelper/CsvParser.cs +++ b/src/CsvHelper/CsvParser.cs @@ -1,6 +1,7 @@ using CsvHelper.Configuration; using System.Buffers; using System.Collections.Concurrent; +using System.Runtime.CompilerServices; namespace CsvHelper; @@ -14,6 +15,8 @@ public class CsvParser : IParser, IDisposable private bool isDisposed; private int rowNumber; private bool leaveOpen; + private bool detectDelimiter; + private bool delimiterDetected; private CsvParserState state; @@ -55,6 +58,8 @@ public CsvParser(TextReader reader, Func con options = configure(new CsvParserOptions()); options.Validate(); + detectDelimiter = options.DetectDelimiter; + switch (options.Mode) { case CsvMode.RFC4180: @@ -103,6 +108,16 @@ public bool MoveNext() return false; } + if (!delimiterDetected) + { + if (detectDelimiter) + { + options.Delimiter = options.GetDelimiter(state.buffer); + } + + delimiterDetected = true; + } + state.Parse(); state.NextRow(); } @@ -224,4 +239,15 @@ protected virtual void Dispose(bool isDisposing) // Set large fields to null isDisposed = true; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void DetectDelimiter() + { + if (delimiterDetected || ) + { + return; + } + + + } } diff --git a/src/CsvHelper/CsvSerializer.cs b/src/CsvHelper/CsvSerializer.cs index 2f7187ca4..42acd578a 100644 --- a/src/CsvHelper/CsvSerializer.cs +++ b/src/CsvHelper/CsvSerializer.cs @@ -2,6 +2,9 @@ namespace CsvHelper; +/// +/// Serializes objects into CSV records. +/// public class CsvSerializer : IDisposable { private TextWriter writer; @@ -9,10 +12,23 @@ public class CsvSerializer : IDisposable private CsvSerializerState state; private bool isDisposed; + /// + /// Current row number. + /// public int Row => state.row; + /// + /// Initializes a new instance of the class. + /// + /// The writer. public CsvSerializer(TextWriter writer) : this(writer, options => options) { } + /// + /// Initializes a new instance of the class. + /// + /// The writer. + /// Configure options. + /// public CsvSerializer(TextWriter writer, Func configureOptions) { this.writer = writer; @@ -38,21 +54,32 @@ public CsvSerializer(TextWriter writer, Func + /// Writes the given field. + /// + /// The field to write. public void Write(ReadOnlySpan field) { state.Write(field); } + /// + /// Moves to the next record. + /// public void MoveNext() { state.MoveNext(); } + /// + /// Flushes the buffer to the writer. + /// public void Flush() { state.Flush(); } + /// public void Dispose() { // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method @@ -60,6 +87,7 @@ public void Dispose() GC.SuppressFinalize(this); } + /// protected virtual void Dispose(bool isDisposing) { if (isDisposed) diff --git a/src/CsvHelper/Delegates/GetDelimiter.cs b/src/CsvHelper/Delegates/GetDelimiter.cs index 293c103eb..de00f3886 100644 --- a/src/CsvHelper/Delegates/GetDelimiter.cs +++ b/src/CsvHelper/Delegates/GetDelimiter.cs @@ -2,9 +2,8 @@ // This file is a part of CsvHelper and is dual licensed under MS-PL and Apache 2.0. // See LICENSE.txt for details or visit http://www.opensource.org/licenses/ms-pl.html for MS-PL and http://opensource.org/licenses/Apache-2.0 for Apache 2.0. // https://github.com/JoshClose/CsvHelper -using CsvHelper.Configuration; -namespace CsvHelper.Delegates; +namespace CsvHelper.Configuration; /// /// Function that resolves the delimiter from the given text. @@ -12,17 +11,17 @@ namespace CsvHelper.Delegates; /// /// /// -public delegate string GetDelimiter(GetDelimiterArgs args); +public delegate char GetDelimiter(GetDelimiterArgs args); /// /// GetDelimiter args. /// -public readonly struct GetDelimiterArgs +public readonly ref struct GetDelimiterArgs { /// /// The text to resolve the delimiter from. /// - public readonly string Text; + public readonly ReadOnlySpan Text; /// /// The configuration. @@ -34,7 +33,7 @@ public readonly struct GetDelimiterArgs /// /// The text to resolve the delimiter from. /// The configuration. - public GetDelimiterArgs(string text, IParserConfiguration configuration) + public GetDelimiterArgs(ReadOnlySpan text, IParserConfiguration configuration) { Text = text; Configuration = configuration; diff --git a/src/CsvHelper/IParser.cs b/src/CsvHelper/IParser.cs index 30c5da390..5ca3fc5dc 100644 --- a/src/CsvHelper/IParser.cs +++ b/src/CsvHelper/IParser.cs @@ -5,7 +5,7 @@ namespace CsvHelper; /// -/// Defines functionality used the parse a CSV file. +/// Defines functionality used the parse records. /// public interface IParser : IDisposable { diff --git a/src/CsvHelper/ISerializer.cs b/src/CsvHelper/ISerializer.cs new file mode 100644 index 000000000..66a1f2ec3 --- /dev/null +++ b/src/CsvHelper/ISerializer.cs @@ -0,0 +1,28 @@ +namespace CsvHelper; + +/// +/// Defines functionality used to serialize records. +/// +public interface ISerializer : IDisposable +{ + /// + /// Current row number. + /// + int Row { get; } + + /// + /// Writes the given field. + /// + /// The field to write. + void Write(ReadOnlySpan field); + + /// + /// Moves to the next record. + /// + void MoveNext(); + + /// + /// Flushes the buffer to the writer. + /// + void Flush(); +} diff --git a/src/CsvHelper/TypeConversion/IEnumerableConverter.cs b/src/CsvHelper/TypeConversion/IEnumerableConverter.cs index fb22388cb..ca7c938b1 100644 --- a/src/CsvHelper/TypeConversion/IEnumerableConverter.cs +++ b/src/CsvHelper/TypeConversion/IEnumerableConverter.cs @@ -19,7 +19,7 @@ public class IEnumerableConverter : DefaultTypeConverter /// /// /// The string representation of the object. - public override string? ConvertToString(object? value, IWriterRow row, MemberMapData memberMapData) + public override ReadOnlySpan ConvertToString(object? value, IWriterRow row, MemberMapData memberMapData) { var list = value as IEnumerable; if (list == null)