Skip to content

Commit d1c589e

Browse files
author
Tony Bark
committed
Improved normalization algorithm
1 parent 8462411 commit d1c589e

File tree

4 files changed

+113
-134
lines changed

4 files changed

+113
-134
lines changed

CSTNet/CST.cs

+70-98
Original file line numberDiff line numberDiff line change
@@ -8,107 +8,79 @@ namespace CSTNet;
88

99
public static class CST
1010
{
11-
const char CARET = '^';
12-
const string LF = "\u000A";
13-
const string CR = "\u000D";
14-
const string CRLF = "\u000D\u000A";
15-
const string LS = "\u2028";
16-
17-
/// <summary>
18-
/// Gets the value from the digit-based key.
19-
/// </summary>
20-
/// <returns>Returns the entry</returns>
21-
public static string Parse(string content, int key) => Parse(content, key.ToString());
22-
23-
/// <summary>
24-
/// Gets the value from the string-based key.
25-
/// </summary>
26-
/// <returns>Returns the entry</returns>
27-
public static string Parse(string content, string key)
28-
{
29-
var entries = NormalizeEntries(content);
30-
return GetEntry(entries, key);
31-
}
11+
const char CARET = '^';
12+
const string LF = "\u000A";
13+
const string CR = "\u000D";
14+
const string CRLF = "\u000D\u000A";
15+
const string LS = "\u2028";
16+
17+
/// <summary>
18+
/// Gets the value from the digit-based key.
19+
/// </summary>
20+
/// <returns>Returns the entry</returns>
21+
public static string Parse(string content, int key) => Parse(content, key.ToString());
22+
23+
/// <summary>
24+
/// Gets the value from the string-based key.
25+
/// </summary>
26+
/// <returns>Returns the entry</returns>
27+
public static string Parse(string content, string key)
28+
{
29+
var entries = NormalizeEntries(content);
30+
return GetEntry(entries, key);
31+
}
3232

3333
#if (NET8_0 && DEBUG)
34-
[UnmanagedCallersOnly(EntryPoint = "parse")]
35-
public static IntPtr Parse(IntPtr content, IntPtr key)
36-
{
37-
// => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key));
38-
var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content));
39-
return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key)));
40-
}
34+
[UnmanagedCallersOnly(EntryPoint = "parse")]
35+
public static IntPtr Parse(IntPtr content, IntPtr key)
36+
{
37+
// => Parse(Marshal.PtrToStringAnsi(content), Marshal.PtrToStringAnsi(key));
38+
var entries = NormalizeEntries(Marshal.PtrToStringAnsi(content));
39+
return Marshal.StringToHGlobalAnsi(GetEntry(entries, Marshal.PtrToStringAnsi(key)));
40+
}
4141
#endif
42-
43-
/// <summary>
44-
/// Replaces the document's line endings with the native system line endings.
45-
/// </summary>
46-
/// <remarks>This stage ensures there are no crashes during parsing.</remarks>
47-
/// <param name="content">The content of the document.</param>
48-
/// <returns>The document's content with native system line endings.</returns>
49-
static IEnumerable<string> NormalizeEntries(string content)
50-
{
51-
// Check if the document already uses native system line endings.
52-
if (!content.Contains(Environment.NewLine))
53-
{
54-
// If not, check for and replace other line ending types.
55-
if (content.Contains(LF))
56-
content = content.Replace(LF,
57-
Environment.NewLine);
58-
59-
if (content.Contains(CR))
60-
content = content.Replace(CR,
61-
Environment.NewLine);
62-
63-
if (content.Contains(CRLF))
64-
content = content.Replace(CRLF,
65-
Environment.NewLine);
66-
67-
if (content.Contains(LS))
68-
content = content.Replace(LS,
69-
Environment.NewLine);
70-
}
71-
72-
// Split the content by the caret and newline characters.
73-
var lines = content.Split(new[] { $"{CARET}{Environment.NewLine}" },
74-
StringSplitOptions.RemoveEmptyEntries);
75-
76-
// Filter out any lines that start with "//", "#", "/*", or end with "*/".
77-
return lines.Where(line =>
78-
!line.StartsWith("//") &&
79-
!line.StartsWith("#") &&
80-
!line.StartsWith("/*") &&
81-
!line.EndsWith("*/"))
82-
.AsEnumerable();
83-
}
84-
85-
/// <summary>
86-
/// Retrieves the value for the specified key from the given entries.
87-
/// </summary>
88-
/// <param name="entries">The entries to search through.</param>
89-
/// <param name="key">The key to search for.</param>
90-
/// <returns>The value for the specified key, or a default string if not found.</returns>
91-
static string GetEntry(IEnumerable<string> entries, string key)
92-
{
93-
// Iterate through the entries.
94-
foreach (var entry in entries)
95-
{
96-
// If the line doesn't start with the key, keep searching.
97-
if (!entry.StartsWith(key))
98-
continue;
99-
100-
// Locate the index of the caret character.
101-
var startIndex = entry.IndexOf(CARET);
102-
// Get the line from the caret character to the end of the string.
103-
var line = entry[startIndex..];
104-
105-
// Return the line with the caret characters trimmed.
106-
return line.TrimStart(CARET).TrimEnd(CARET);
107-
}
108-
109-
// If no entry is found, return a default string.
110-
return "***MISSING***";
111-
}
42+
/// <summary>
43+
/// Normalizes the content by replacing various newline characters with Environment.NewLine and filters out comments.
44+
/// </summary>
45+
/// <param name="content">The content to normalize.</param>
46+
/// <returns>An enumerable of normalized lines.</returns>
47+
public static IEnumerable<string> NormalizeEntries(string content)
48+
{
49+
var newLines = new[] { LF, CR, CRLF, LS };
50+
51+
content = newLines.Aggregate(content, (current, nl) => current.Replace(nl, Environment.NewLine));
52+
53+
return content.Split($"{CARET}{Environment.NewLine}", StringSplitOptions.RemoveEmptyEntries)
54+
.Where(line => !line.StartsWith("//") && !line.StartsWith('#') && !line.StartsWith("/*") && !line.EndsWith("*/"));
55+
}
56+
57+
/// <summary>
58+
/// Retrieves the value for the specified key from the given entries.
59+
/// </summary>
60+
/// <param name="entries">The entries to search through.</param>
61+
/// <param name="key">The key to search for.</param>
62+
/// <returns>The value for the specified key, or a default string if not found.</returns>
63+
static string GetEntry(IEnumerable<string> entries, string key)
64+
{
65+
// Iterate through the entries.
66+
foreach (var entry in entries)
67+
{
68+
// If the line doesn't start with the key, keep searching.
69+
if (!entry.StartsWith(key))
70+
continue;
71+
72+
// Locate the index of the caret character.
73+
var startIndex = entry.IndexOf(CARET);
74+
// Get the line from the caret character to the end of the string.
75+
var line = entry[startIndex..];
76+
77+
// Return the line with the caret characters trimmed.
78+
return line.TrimStart(CARET).TrimEnd(CARET);
79+
}
80+
81+
// If no entry is found, return a default string.
82+
return "***MISSING***";
83+
}
11284

11385
}
11486

CSTNet/CSTNet.csproj

+19-19
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22

3-
<PropertyGroup>
4-
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
5-
<Version>2.1.100</Version>
6-
<Nullable>enable</Nullable>
7-
<LangVersion>latest</LangVersion>
8-
<ImplicitUsings>enable</ImplicitUsings>
9-
<Authors>Tony Bark</Authors>
10-
<PackageDescription>
11-
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words
12-
as keys and the value as text enclosed between carets. ([key] ^[value]^)
3+
<PropertyGroup>
4+
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
5+
<Version>2.1.101-alpha</Version>
6+
<Nullable>enable</Nullable>
7+
<LangVersion>latest</LangVersion>
8+
<ImplicitUsings>enable</ImplicitUsings>
9+
<Authors>Tony Bark</Authors>
10+
<PackageDescription>
11+
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words
12+
as keys and the value as text enclosed between carets. ([key] ^[value]^)
1313

14-
CSTNet provides you the framework for parsing the CST format.
15-
</PackageDescription>
16-
<RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl>
17-
<PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>
18-
</PropertyGroup>
14+
CSTNet provides you the framework for parsing the CST format.
15+
</PackageDescription>
16+
<RepositoryUrl>https://github.com/tonytins/cstdotnet</RepositoryUrl>
17+
<PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>
18+
</PropertyGroup>
1919

20-
<!-- Support AOT on .NET 8+ -->
21-
<PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' ">
22-
<IsAotCompatible>true</IsAotCompatible>
23-
</PropertyGroup>
20+
<!-- Support AOT on .NET 8+ -->
21+
<PropertyGroup Condition=" '$(TargetFramework)' == 'net8.0' ">
22+
<IsAotCompatible>true</IsAotCompatible>
23+
</PropertyGroup>
2424

2525
</Project>

CSTNet/UIText.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ namespace CSTNet;
44

55
public class UIText : IUIText
66
{
7-
/// <summary>
8-
/// The language of the text.
9-
/// </summary>
10-
string Language { get; set; } = "english";
7+
/// <summary>
8+
/// The language of the text.
9+
/// </summary>
10+
string Language { get; set; } = "english";
1111

1212
/// <summary>
1313
/// The base directory for the language files.

README.md

+20-13
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,10 @@
1-
# CST.NET
1+
# [CST.NET](http://CST.NET)
22

3-
<p align="center">
4-
<a href="https://github.com/tonytins/cstdotnet/blob/main/LICENSE"><img src="https://img.shields.io/github/license/tonytins/cstdotnet" alt="GitHub license"></a>
5-
<a href="https://github.com/tonytins/cstdotnet/actions?query=workflow%3Abuild.yml"><img src="https://img.shields.io/github/actions/workflow/status/tonytins/cstdotnet/build.yml" alt="GitHub Workflow Status"></a>
6-
<img src="https://img.shields.io/github/commit-activity/w/tonytins/cstdotnet" alt="GitHub commit activity">
7-
<a href="code_of_conduct.md"></br>
8-
<img src="https://img.shields.io/codeclimate/maintainability-percentage/tonytins/cstdotnet" alt="Code Climate maintainability">
9-
<img src="https://img.shields.io/nuget/dt/CSTNet" alt="NuGet Downloads"> <a href="https://www.nuget.org/packages/tonybark.updatetools"><img src="https://img.shields.io/nuget/v/cstnet.svg" /></a></br><img src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg" alt="Contributor Covenant"></a></br>
10-
</p>
3+
&lt;p align="center"&gt;&lt;a href="https://github.com/tonytins/cstdotnet/blob/main/LICENSE"&gt;&lt;img src="https://img.shields.io/github/license/tonytins/cstdotnet" alt="GitHub license"&gt;&lt;/a&gt; &lt;a href="https://github.com/tonytins/cstdotnet/actions?query=workflow%3Abuild.yml"&gt;&lt;img src="https://img.shields.io/github/actions/workflow/status/tonytins/cstdotnet/build.yml" alt="GitHub Workflow Status"&gt;&lt;/a&gt; &lt;img src="https://img.shields.io/github/commit-activity/w/tonytins/cstdotnet" alt="GitHub commit activity"&gt; &lt;a href="code_of_conduct.md"&gt;&lt;/br&gt; &lt;img src="https://img.shields.io/codeclimate/maintainability-percentage/tonytins/cstdotnet" alt="Code Climate maintainability"&gt; &lt;img src="https://img.shields.io/nuget/dt/CSTNet" alt="NuGet Downloads"&gt; &lt;a href="https://www.nuget.org/packages/tonybark.updatetools"&gt;&lt;img src="https://img.shields.io/nuget/v/cstnet.svg" /&gt;&lt;/a&gt;&lt;/br&gt;&lt;img src="https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg" alt="Contributor Covenant"&gt;&lt;/a&gt;&lt;/br&gt; &lt;/p&gt;
114

12-
CST.NET is a library for parsing Maxis' key-value pair format. It can be used in conjunction with your own custom frameworks, or the original ``UIText`` APIs.
5+
CST.NET is a library for parsing Maxis' key-value pair format. It can be used in conjunction with your own custom frameworks, or the original `UIText` APIs.
136

14-
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words as keys and the value as text enclosed between carets. (e.g. ``<key> ^<text>^``) Any text which is not enclosed with carets is considered a comment and ignored. Neither strings nor comments may use the caret character.
7+
Caret-Separated Text (or CST) is a key-value pair format represented by digits or words as keys and the value as text enclosed between carets. (e.g. `<key> ^<text>^`) Any text which is not enclosed with carets is considered a comment and ignored. Neither strings nor comments may use the caret character.
158

169
## Changelog
1710

@@ -23,7 +16,7 @@ See [/docs](./doc/README.md).
2316

2417
## To-do
2518

26-
- [ ] Support for parameters (e.g. ``%1``)
19+
- [ ] Support for parameters (e.g. `%1`)
2720

2821
## Known issues
2922

@@ -38,6 +31,20 @@ See [/docs](./doc/README.md).
3831
- [.NET Interactive](https://github.com/dotnet/interactive/blob/main/README.md) for notebooks (optional).
3932
- [VSCode Extension](https://marketplace.visualstudio.com/items?itemName=ms-dotnettools.dotnet-interactive-vscode) or [nteract](https://nteract.io/).
4033

34+
35+
# Contributing
36+
37+
You can contribute to CST.NET by testing cutting edge features in the latest releases, filing bugs, and joining in the discussion on our forums!
38+
39+
* [Getting Started](https://github.com/tonytins/cstdotnet/wiki)
40+
* [Project Structure](https://github.com/tonytins/cstdotnet/wiki/Project-structure)
41+
* [Coding Standards](https://github.com/tonytins/cstdotnet/wiki/Coding-standards)
42+
* [Pull Requests](https://github.com/tonytins/cstdotnet/pulls): [Open](https://github.com/tonytins/cstdotnet/pulls)/[Closed](https://github.com/tonytins/cstdotnet/issues?q=is%3Apr+is%3Aclosed)
43+
44+
Looking for something to do? Check out the issues tagged as [help wanted](https://github.com/tonytins/cstdotnet/labels/help%20wanted) to get started.
45+
46+
Regarding translations, full object and UI translations should currently be released on the forums. This is far from perfect and is due to be reworked. Stay tuned!
47+
4148
## License
4249

43-
I license this project under the BSD-3-Clause license - see [LICENSE](LICENSE) for details.
50+
I license this project under the BSD-3-Clause license - see [LICENSE](LICENSE) for details.

0 commit comments

Comments
 (0)