Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshClose committed Jun 17, 2024
1 parent e84db05 commit 1b66496
Show file tree
Hide file tree
Showing 11 changed files with 162 additions and 23 deletions.
23 changes: 15 additions & 8 deletions src/CsvHelper/Configuration/ConfigurationFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,18 @@ public static string GetDynamicPropertyName(GetDynamicPropertyNameArgs args)
/// Return the detected delimiter or null if one wasn't found.
/// </summary>
/// <param name="args">The args.</param>
public static string GetDelimiter(GetDelimiterArgs args)
public static char GetDelimiter(GetDelimiterArgs args)
{
var text = args.Text;
var text = args.Text.ToString();
var config = args.Configuration;

if (args.Configuration.CultureInfo.TextInfo.ListSeparator.Length > 1)
{
throw new ConfigurationException($"The CultureInfo.TextInfo.ListSeparator ");
}

var listSeparator = config.CultureInfo.TextInfo.ListSeparator[0];

if (config.Mode == CsvMode.RFC4180)
{
// Remove text in between pairs of quotes.
Expand All @@ -202,7 +209,7 @@ public static string GetDelimiter(GetDelimiterArgs args)
newLine = "\r\n|\r|\n";
}

var lineDelimiterCounts = new List<Dictionary<string, int>>();
var lineDelimiterCounts = new List<Dictionary<char, int>>();
while (text.Length > 0)
{
// Since all escaped text has been removed, we can reliably read line by line.
Expand All @@ -211,11 +218,11 @@ public static string GetDelimiter(GetDelimiterArgs args)

if (line.Length > 0)
{
var delimiterCounts = new Dictionary<string, int>();
var delimiterCounts = new Dictionary<char, int>();
foreach (var delimiter in config.DetectDelimiterValues)
{
// Escape regex special chars to use as regex pattern.
var pattern = Regex.Replace(delimiter, @"([.$^{\[(|)*+?\\])", "\\$1");
var pattern = Regex.Replace(delimiter.ToString(), @"([.$^{\[(|)*+?\\])", "\\$1");
delimiterCounts[delimiter] = Regex.Matches(line, pattern).Count;
}

Expand Down Expand Up @@ -247,11 +254,11 @@ orderby sum descending
}
).ToList();

string? newDelimiter = null;
if (delimiters.Any(x => x.Delimiter == config.CultureInfo.TextInfo.ListSeparator) && lineDelimiterCounts.Count > 1)
char? newDelimiter = null;
if (delimiters.Any(x => x.Delimiter == listSeparator) && lineDelimiterCounts.Count > 1)
{
// The culture's separator is on every line. Assume this is the delimiter.
newDelimiter = config.CultureInfo.TextInfo.ListSeparator;
newDelimiter = listSeparator;
}
else
{
Expand Down
25 changes: 24 additions & 1 deletion src/CsvHelper/Configuration/CsvOptions.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,34 @@
namespace CsvHelper.Configuration;

/// <summary>
/// Common configuration options for reading and writing CSV files.
/// </summary>
public abstract record CsvOptions
{
public char Delimiter { get; init; } = ',';
/// <summary>
/// The mode.
/// See <see cref="CsvMode"/> for more details.
/// </summary>
public CsvMode Mode { get; init; }

/// <summary>
/// The delimiter used to separate fields.
/// Default is ,.
/// If you need a multi-character delimiter, use <see cref="ReplaceTextReader"/> to replace your newline with a single character.
/// </summary>
public char Delimiter { get; internal set; } = ',';

/// <summary>
/// The character used to escape characters.
/// Default is '"'.
/// </summary>
public char Escape { get; init; } = '\"';

/// <summary>
/// The newline character to use.
/// If not set, the parser uses one of \r\n, \r, or \n.
/// If you need a multi-character newline, use <see cref="ReplaceTextReader"/> to replace your newline with a single character.
/// </summary>
public char? NewLine { get; init; }

internal int BufferSize = 0x1000;
Expand Down
24 changes: 22 additions & 2 deletions src/CsvHelper/Configuration/CsvParserOptions.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,33 @@
namespace CsvHelper.Configuration;

/// <summary>
/// Configuration options used for <see cref="CsvParser"/>.
/// </summary>
public record CsvParserOptions : CsvOptions
{
public CsvMode Mode { get; init; }

/// <summary>
/// Cache fields that are created when parsing.
/// Default is false.
/// </summary>
public bool CacheFields { get; init; }

/// <summary>
/// Strategy used for parsing.
/// Defaults to the highest performance your framework and CPU supports.
/// </summary>
public ParsingStrategy? ParsingStrategy { get; init; }

/// <summary>
/// Detect the delimiter instead of using the delimiter from configuration.
/// Default is <c>false</c>.
/// </summary>
public bool DetectDelimiter { get; init; }

/// <summary>
/// The function that is called when <see cref="DetectDelimiter"/> is enabled.
/// </summary>
public GetDelimiter GetDelimiter { get; set; } = ConfigurationFunctions.GetDelimiter;

internal StringCreator StringCreator = (chars, i)
#if NET6_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER
=> new string(chars);
Expand Down
12 changes: 10 additions & 2 deletions src/CsvHelper/Configuration/CsvSerializerOptions.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
namespace CsvHelper.Configuration;

/// <summary>
/// A function that is used to determine if a field should get escaped when writing.
/// </summary>
/// <param name="field">The field.</param>
public delegate bool ShouldEscape(ReadOnlySpan<char> field);

/// <summary>
/// Configuration options used for <see cref="CsvSerializer"/>.
/// </summary>
public record CsvSerializerOptions : CsvOptions
{
public CsvMode Mode { get; init; }

/// <summary>
/// A function that is used to determine if a field should get escaped when writing.
/// </summary>
public ShouldEscape? ShouldEscape { get; init; }

internal CsvModeEscape ModeEscape = ModeRfc4180.Escape;
Expand Down
4 changes: 2 additions & 2 deletions src/CsvHelper/Configuration/IParserConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public interface IParserConfiguration
/// The delimiter used to separate fields.
/// Default is <see cref="TextInfo.ListSeparator"/>.
/// </summary>
string Delimiter { get; }
char Delimiter { get; }

/// <summary>
/// Detect the delimiter instead of using the delimiter from configuration.
Expand All @@ -139,7 +139,7 @@ public interface IParserConfiguration
/// The possible delimiter values used when detecting the delimiter.
/// Default is [",", ";", "|", "\t"].
/// </summary>
string[] DetectDelimiterValues { get; }
char[] DetectDelimiterValues { get; }

/// <summary>
/// The character used to escape characters.
Expand Down
26 changes: 26 additions & 0 deletions src/CsvHelper/CsvParser.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using CsvHelper.Configuration;
using System.Buffers;
using System.Collections.Concurrent;
using System.Runtime.CompilerServices;

namespace CsvHelper;

Expand All @@ -14,6 +15,8 @@ public class CsvParser : IParser, IDisposable
private bool isDisposed;
private int rowNumber;
private bool leaveOpen;
private bool detectDelimiter;
private bool delimiterDetected;

private CsvParserState state;

Expand Down Expand Up @@ -55,6 +58,8 @@ public CsvParser(TextReader reader, Func<CsvParserOptions, CsvParserOptions> con
options = configure(new CsvParserOptions());
options.Validate();

detectDelimiter = options.DetectDelimiter;

switch (options.Mode)
{
case CsvMode.RFC4180:
Expand Down Expand Up @@ -103,6 +108,16 @@ public bool MoveNext()
return false;
}

if (!delimiterDetected)
{
if (detectDelimiter)
{
options.Delimiter = options.GetDelimiter(state.buffer);
}

delimiterDetected = true;
}

state.Parse();
state.NextRow();
}
Expand Down Expand Up @@ -224,4 +239,15 @@ protected virtual void Dispose(bool isDisposing)
// Set large fields to null
isDisposed = true;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void DetectDelimiter()
{
if (delimiterDetected || )
{
return;
}


}
}
28 changes: 28 additions & 0 deletions src/CsvHelper/CsvSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,33 @@

namespace CsvHelper;

/// <summary>
/// Serializes objects into CSV records.
/// </summary>
public class CsvSerializer : IDisposable
{
private TextWriter writer;
private CsvSerializerOptions options;
private CsvSerializerState state;
private bool isDisposed;

/// <summary>
/// Current row number.
/// </summary>
public int Row => state.row;

/// <summary>
/// Initializes a new instance of the <see cref="CsvSerializer"/> class.
/// </summary>
/// <param name="writer">The writer.</param>
public CsvSerializer(TextWriter writer) : this(writer, options => options) { }

/// <summary>
/// Initializes a new instance of the <see cref="CsvSerializer"/> class.
/// </summary>
/// <param name="writer">The writer.</param>
/// <param name="configureOptions">Configure options.</param>
/// <exception cref="NotSupportedException"></exception>
public CsvSerializer(TextWriter writer, Func<CsvSerializerOptions, CsvSerializerOptions> configureOptions)
{
this.writer = writer;
Expand All @@ -38,28 +54,40 @@ public CsvSerializer(TextWriter writer, Func<CsvSerializerOptions, CsvSerializer
state = new CsvSerializerState(writer, options);
}

/// <summary>
/// Writes the given field.
/// </summary>
/// <param name="field">The field to write.</param>
public void Write(ReadOnlySpan<char> field)
{
state.Write(field);
}

/// <summary>
/// Moves to the next record.
/// </summary>
public void MoveNext()
{
state.MoveNext();
}

/// <summary>
/// Flushes the buffer to the writer.
/// </summary>
public void Flush()
{
state.Flush();
}

/// <inheritdoc />
public void Dispose()
{
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(isDisposing: true);
GC.SuppressFinalize(this);
}

/// <inheritdoc />
protected virtual void Dispose(bool isDisposing)
{
if (isDisposed)
Expand Down
11 changes: 5 additions & 6 deletions src/CsvHelper/Delegates/GetDelimiter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,26 @@
// This file is a part of CsvHelper and is dual licensed under MS-PL and Apache 2.0.
// See LICENSE.txt for details or visit http://www.opensource.org/licenses/ms-pl.html for MS-PL and http://opensource.org/licenses/Apache-2.0 for Apache 2.0.
// https://github.com/JoshClose/CsvHelper
using CsvHelper.Configuration;

namespace CsvHelper.Delegates;
namespace CsvHelper.Configuration;

/// <summary>
/// Function that resolves the delimiter from the given text.
/// Returns null if no delimiter is found.
/// </summary>
/// <param name="args"></param>
/// <returns></returns>
public delegate string GetDelimiter(GetDelimiterArgs args);
public delegate char GetDelimiter(GetDelimiterArgs args);

/// <summary>
/// GetDelimiter args.
/// </summary>
public readonly struct GetDelimiterArgs
public readonly ref struct GetDelimiterArgs
{
/// <summary>
/// The text to resolve the delimiter from.
/// </summary>
public readonly string Text;
public readonly ReadOnlySpan<char> Text;

/// <summary>
/// The configuration.
Expand All @@ -34,7 +33,7 @@ public readonly struct GetDelimiterArgs
/// </summary>
/// <param name="text">The text to resolve the delimiter from.</param>
/// <param name="configuration">The configuration.</param>
public GetDelimiterArgs(string text, IParserConfiguration configuration)
public GetDelimiterArgs(ReadOnlySpan<char> text, IParserConfiguration configuration)
{
Text = text;
Configuration = configuration;
Expand Down
2 changes: 1 addition & 1 deletion src/CsvHelper/IParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace CsvHelper;

/// <summary>
/// Defines functionality used the parse a CSV file.
/// Defines functionality used the parse records.
/// </summary>
public interface IParser : IDisposable
{
Expand Down
28 changes: 28 additions & 0 deletions src/CsvHelper/ISerializer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
namespace CsvHelper;

/// <summary>
/// Defines functionality used to serialize records.
/// </summary>
public interface ISerializer : IDisposable
{
/// <summary>
/// Current row number.
/// </summary>
int Row { get; }

/// <summary>
/// Writes the given field.
/// </summary>
/// <param name="field">The field to write.</param>
void Write(ReadOnlySpan<char> field);

/// <summary>
/// Moves to the next record.
/// </summary>
void MoveNext();

/// <summary>
/// Flushes the buffer to the writer.
/// </summary>
void Flush();
}
2 changes: 1 addition & 1 deletion src/CsvHelper/TypeConversion/IEnumerableConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class IEnumerableConverter : DefaultTypeConverter
/// <param name="row"></param>
/// <param name="memberMapData"></param>
/// <returns>The string representation of the object.</returns>
public override string? ConvertToString(object? value, IWriterRow row, MemberMapData memberMapData)
public override ReadOnlySpan<char> ConvertToString(object? value, IWriterRow row, MemberMapData memberMapData)
{
var list = value as IEnumerable;
if (list == null)
Expand Down

0 comments on commit 1b66496

Please sign in to comment.