Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tokenizer supports multiple encodings, compatible with .Net Standard 2.0 #218

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 19 additions & 17 deletions OpenAI.Playground/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@
using OpenAI.GPT3.Interfaces;
using OpenAI.Playground.TestHelpers;

var builder = new ConfigurationBuilder()
.AddJsonFile("ApiSettings.json")
.AddUserSecrets<Program>();
//var builder = new ConfigurationBuilder()
// .AddJsonFile("ApiSettings.json")
// .AddUserSecrets<Program>();

IConfiguration configuration = builder.Build();
var serviceCollection = new ServiceCollection();
serviceCollection.AddScoped(_ => configuration);
//IConfiguration configuration = builder.Build();
//var serviceCollection = new ServiceCollection();
//serviceCollection.AddScoped(_ => configuration);

#if NET6_0_OR_GREATER
// Laser cat eyes is a tool that shows your requests and responses between OpenAI server and your client.
// Get your app key from https://lasercateyes.com for FREE and put it under ApiSettings.json or secrets.json.
// It is in Beta version, if you don't want to use it just comment out below line.
serviceCollection.AddLaserCatEyesHttpClientListener();
//serviceCollection.AddLaserCatEyesHttpClientListener();
#endif

serviceCollection.AddOpenAIService();
//serviceCollection.AddOpenAIService();
//// DeploymentId and ResourceName are only for Azure OpenAI. If you want to use Azure OpenAI services you have to set Provider type To Azure.
//serviceCollection.AddOpenAIService(options =>
//{
Expand All @@ -32,21 +32,21 @@
// options.ResourceName = "MyResourceName";
//});

var serviceProvider = serviceCollection.BuildServiceProvider();
var sdk = serviceProvider.GetRequiredService<IOpenAIService>();
//var serviceProvider = serviceCollection.BuildServiceProvider();
//var sdk = serviceProvider.GetRequiredService<IOpenAIService>();

// CHAT GPT
// |-----------------------------------------------------------------------|
// | o \ o / _ o __| \ / |__ o _ \ o / o |
// | /|\ | /\ ___\o \o | o/ o/__ /\ | /|\ |
// | / \ / \ | \ /) | ( \ /o\ / ) | (\ / | / \ / \ |
// |-----------------------------------------------------------------------|
await ChatCompletionTestHelper.RunSimpleChatCompletionTest(sdk);
await ChatCompletionTestHelper.RunSimpleCompletionStreamTest(sdk);
//await ChatCompletionTestHelper.RunSimpleChatCompletionTest(sdk);
//await ChatCompletionTestHelper.RunSimpleCompletionStreamTest(sdk);

// Whisper
await AudioTestHelper.RunSimpleAudioCreateTranscriptionTest(sdk);
await AudioTestHelper.RunSimpleAudioCreateTranslationTest(sdk);
//await AudioTestHelper.RunSimpleAudioCreateTranscriptionTest(sdk);
//await AudioTestHelper.RunSimpleAudioCreateTranslationTest(sdk);

//await ModelTestHelper.FetchModelsTest(sdk);
//await EditTestHelper.RunSimpleEditCreateTest(sdk);
Expand All @@ -64,8 +64,10 @@
//////await FileTestHelper.RunSimpleFileTest(sdk); //will delete all of your files
//////await FineTuningTestHelper.CleanUpAllFineTunings(sdk); //!!!!! will delete all fine-tunings
//await FineTuningTestHelper.RunCaseStudyIsTheModelMakingUntrueStatements(sdk);
await TokenizerTestHelper.RunTokenizerTest();
await TokenizerTestHelper.RunTokenizerCountTest();
await TokenizerTestHelper.RunTokenizerTestCrClean();
//await TokenizerTestHelper.RunTokenizerTest();
//await TokenizerTestHelper.RunTokenizerCountTest();
//await TokenizerTestHelper.RunTokenizerTestCrClean();

await TokenizerTestHelper.RunTiktokenTokenizerTest();

Console.ReadLine();
30 changes: 30 additions & 0 deletions OpenAI.Playground/TestHelpers/TokenizerTestHelper.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using OpenAI.GPT3.Tokenizer.GPT3;
using OpenAI.GPT3.Tokenizer.TikToken;

namespace OpenAI.Playground.TestHelpers;

Expand Down Expand Up @@ -88,4 +89,33 @@ public static async Task RunTokenizerTestCrClean()
throw;
}
}

public static async Task RunTiktokenTokenizerTest()
{
try
{
//Tokenizer tokenizer = new Tokenizer("cl100k_base");
Tokenizer tokenizer = new Tokenizer().FromModelName("gpt-3.5-turbo-0301");
//Tokenizer tokenizer = new Tokenizer().FromModel(Models.Model.TextDavinciV3);

string str = @"床前明月光,疑是地上霜,举头望明月,低头思故乡。";
int[] res = tokenizer.Encode(str);

Console.WriteLine(str);
Console.WriteLine(res.Length);
foreach (var item in res)
{
Console.Write(item + " ");
}
Console.WriteLine();

string str2 = tokenizer.Decode(res);
Console.WriteLine(str2);
}
catch (Exception e)
{
Console.WriteLine(e);
throw;
}
}
}
6 changes: 6 additions & 0 deletions OpenAI.SDK/OpenAI.GPT3.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,18 @@

<ItemGroup>
<None Remove="Tokenizer\encoder.json" />
<None Remove="Tokenizer\TikToken\ranks\cl100k_base.tiktoken" />
<None Remove="Tokenizer\TikToken\ranks\p50k_base.tiktoken" />
<None Remove="Tokenizer\TikToken\ranks\r50k_base.tiktoken" />
<None Remove="Tokenizer\vocab.bpe" />
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="Tokenizer\GPT3\encoder.json" />
<EmbeddedResource Include="Tokenizer\GPT3\vocab.bpe" />
<EmbeddedResource Include="Tokenizer\TikToken\ranks\cl100k_base.tiktoken" />
<EmbeddedResource Include="Tokenizer\TikToken\ranks\p50k_base.tiktoken" />
<EmbeddedResource Include="Tokenizer\TikToken\ranks\r50k_base.tiktoken" />
</ItemGroup>
<ItemGroup>
<None Include="..\Readme.md">
Expand Down
Loading