feat: basic features for P

gusye1234 · gusye1234 · commit ca79e7325023 · 2025-02-18T00:52:19.000+08:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,56 @@
+name: test
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.ipynb'
+      - 'examples/**'
+  pull_request:
+    branches:
+      - main
+      - dev
+    paths-ignore:
+      - '**/*.md'
+      - '**/*.ipynb'
+      - 'examples/**'
+
+jobs:
+  test:
+    name: Tests on ${{ matrix.os }} for ${{ matrix.python-version }}
+    strategy:
+      matrix:
+        python-version: [3.11]
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+      - name: Lint with flake8
+        run: |
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+      - name: Build and Test
+        run: |
+          python -m pytest -o log_cli=true -o log_cli_level="INFO" --cov=drive_flow --cov-report=xml -v ./
+      - name: Check codecov file
+        id: check_files
+        uses: andstor/file-existence-action@v1
+        with:
+          files: './coverage.xml'
+      - name: Upload coverage from test to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          file: ./coverage.xml
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/prompt_string/__init__.py b/prompt_string/__init__.py
@@ -1,5 +1,5 @@
+from .string import PromptString as P
+
 __author__ = "Gus Ye"
-__version__ = "0.0.1.dev1"
+__version__ = "0.0.1"
 __url__ = "https://github.com/memodb-io/prompt-string"
-
-print("prompt-string is testing")
diff --git a/prompt_string/string.py b/prompt_string/string.py
@@ -0,0 +1,85 @@
+from typing import Optional, Literal
+from functools import wraps
+from . import token
+
+
+def to_prompt_string(func):
+    @wraps(func)
+    def wrapper(self: "PromptString", *args, **kwargs):
+        result = func(self, *args, **kwargs)
+        return PromptString(result, **self._meta_info)
+
+    return wrapper
+
+
+class PromptString(str):
+
+    def __new__(
+        cls,
+        *args,
+        role: Optional[Literal["system", "user", "assistant"]] = None,
+        **kwargs,
+    ):
+        instance = str.__new__(cls, *args, **kwargs)
+        instance.__prompt_string_tokens = token.get_encoded_tokens(instance)
+        instance.__prompt_string_role = role
+        instance.__prompt_string_kwargs = {
+            "role": role,
+        }
+        return instance
+
+    @property
+    def role(self):
+        return self.__prompt_string_role
+
+    @property
+    def _meta_info(self):
+        return self.__prompt_string_kwargs
+
+    @role.setter
+    def role(self, value):
+        self.__prompt_string_role = value
+
+    def __len__(self):
+        return len(self.__prompt_string_tokens)
+        # return len(token.get_encoded_tokens(super().__str__()))
+
+    @to_prompt_string
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return token.get_decoded_tokens(self.__prompt_string_tokens[index])
+        elif isinstance(index, int):
+            return token.get_decoded_tokens([self.__prompt_string_tokens[index]])
+        else:
+            raise ValueError(f"Invalid index type: {type(index)}")
+
+    def message(self, style="openai"):
+        if style == "openai":
+            return {
+                "role": self.role,
+                "content": super().__str__(),
+            }
+        else:
+            raise ValueError(f"Invalid style: {style}")
+
+    def __add__(self, other):
+        if isinstance(other, PromptString):
+            return PromptString(super().__add__(other), **other._meta_info)
+        elif isinstance(other, str):
+            return PromptString(super().__add__(other), **self._meta_info)
+        else:
+            raise ValueError(f"Invalid type for Prompt Concatenation: {type(other)}")
+
+    def __truediv__(self, other):
+        from .string_chain import PromptChain
+
+        assert isinstance(other, PromptString)
+        return PromptChain([self, other])
+
+    @to_prompt_string
+    def replace(self, old, new, count=-1):
+        return super().replace(old, new, count)
+
+    @to_prompt_string
+    def format(self, *args, **kwargs):
+        return super().format(*args, **kwargs)
diff --git a/prompt_string/string_chain.py b/prompt_string/string_chain.py
@@ -0,0 +1,67 @@
+from .string import PromptString
+
+TOTAL_ROLES = {"system", "user", "assistant"}
+DEFAULT_ROLE_ORDER = ["user", "assistant"]
+
+
+class PromptChain:
+    def __init__(self, prompts: list[PromptString], default_start_role: str = "user"):
+        assert all(isinstance(p, PromptString) for p in prompts)
+        self.__prompts = prompts
+        self.__start_role = default_start_role
+
+    def __len__(self):
+        return len(self.__prompts)
+
+    def __getitem__(self, index):
+        if isinstance(index, int):
+            return self.__prompts[index]
+        elif isinstance(index, slice):
+            return PromptChain(
+                self.__prompts[index], default_start_role=self.__start_role
+            )
+        else:
+            raise ValueError(f"Invalid index type: {type(index)}")
+
+    @property
+    def infer_roles(self):
+        if not len(self.__prompts):
+            return []
+        results = []
+        iter_prompts = self.__prompts
+        if self.__start_role in ["system", "assistant"]:
+            results.append(self.__start_role)
+            iter_prompts = iter_prompts[1:]
+        for i, p in enumerate(iter_prompts):
+            default_role = DEFAULT_ROLE_ORDER[i % len(DEFAULT_ROLE_ORDER)]
+            results.append(p.role or default_role)
+        return results
+
+    @property
+    def roles(self):
+        return [p.role for p in self.__prompts]
+
+    def __truediv__(self, other):
+        if isinstance(other, PromptChain):
+            return PromptChain(
+                self.__prompts + other.__prompts, default_start_role=self.__start_role
+            )
+        elif isinstance(other, PromptString):
+            return PromptChain(
+                self.__prompts + [other], default_start_role=self.__start_role
+            )
+        else:
+            raise ValueError(f"Invalid type for PromptChain Division: {type(other)}")
+
+    def messages(self, style="openai"):
+        if style == "openai":
+            ms = [p.message() for p in self.__prompts]
+            roles = self.infer_roles
+            for i in range(len(ms)):
+                ms[i]["role"] = roles[i]
+            return ms
+        else:
+            raise ValueError(f"Invalid style: {style}")
+
+    def __str__(self):
+        return str(self.messages())
diff --git a/prompt_string/token.py b/prompt_string/token.py
@@ -0,0 +1,24 @@
+from tiktoken import encoding_for_model
+
+
+USE_ENCODER = None
+
+
+def get_encoded_tokens(content: str) -> list[int]:
+    return USE_ENCODER.encode(content)
+
+
+def get_decoded_tokens(tokens: list[int]) -> str:
+    return USE_ENCODER.decode(tokens)
+
+
+def truncate_string(content: str, max_tokens: int):
+    return get_decoded_tokens(get_encoded_tokens(content)[:max_tokens])
+
+
+def setup_encoder(model: str = "gpt-4o"):
+    global USE_ENCODER
+    USE_ENCODER = encoding_for_model(model)
+
+
+setup_encoder()
diff --git a/prompt_string/types.py b/prompt_string/types.py
diff --git a/readme.md b/readme.md
@@ -16,15 +16,16 @@ Prompt is essentially a string, but it should behave somewhat differently from a
 
 👨 **Role & Concatenation**: Prompt strings should have designated roles (e.g., `system`, `user`, `assistant`) and should be concatenated in a specific manner.
 
-🦆 **Binding Functions**: A prompt string contains logic and instructions, so having some binding functions for AI-related stuff is beneficial and necessary (e.g.,  convert to OpenAI Message Format).
 
 
+## Features
 
-**Few promises in `prompt-string`:**
+`prompt-string` provides two types:
 
-- `prompt-string` inherits from `string`. Therefore, aside from the mentioned features, its other behaviors are just like those of a `string` in Python.
-- `prompt-string` won't add OpenAI and other AI SDKs as dependencies; it is simply a toolkit for prompts.
-- `prompt-string` will be super light and fast, with no heavy processes running behind the scenes.
+- `P` for prompt, inherits from `string`. Length, Slicing and concatenation are modified and support new attributes like `.role`.
+  - `p = P("You're a helpful assistant")`
+- `PC` for prompt chain, act like `list[P]`. Link a series of prompt and support `.messages(...)`
+  - `pc = p1 / p2 / p3`
 
 
 
@@ -50,35 +51,65 @@ print("Decoded result of the second token", prompt[2])
 print("The decoded result of first five tokens", prompt[:5])
 ```
 
+`P` supports some `str` native methods to still return a `P` object:
+
+- `.format`
+- `.replace`
 
+```python
+prompt = P("you're a helpful assistant. {temp}")
+
+print(len(prompt.format(temp="End of instructions")))
+print(len(prompt.replace("{temp}", ""))
+```
 
-#### Role & Concatenation
+> 🧐 Raise an issue if you think other methods should be supported
+
+
+
+#### Role
 
 ```python
 from prompt_string import P
 
-sp = P("you're a helpful assistant.", "system")
-up = P("How are you?", "user")
+sp = P("you're a helpful assistant.", role="system")
+up = P("How are you?", role="user")
 
-print(sp.role, up.role, (sp+up).role)
+print(sp.role, up.role, (sp+up).roles)
 print(sp + up)
+
+print(sp.message())
 ```
 
-- role can be `None`, `str`, `list[str]`
+- role can be `None`, `str` for `P`
 - For single prompt, like `sp`, the role is `str`(*e.g.* `system`) or `None`
-- For concatenated prompts, like `sp+up`, the role is `list[str]`(*e.g.* `['system', 'user']`)
+- `sp+up` will concatenate two prompt string and generate a new `P`, whose role will be updated if the latter one has one.
+  - For example, `sp+up`'s role is `user`, `sp+P('Hi')`'s role is `system`
 
 
+- `.message(...)` return a JSON object of this prompt.
 
-#### Binding Functions
 
-```python
-from prompt_string import P
 
-sp = P("you're a helpful assistant.")
-up = P("How are you?")
+#### Concatenation
 
-print((sp+up).messages())
+```python
+pc = sp / up
+print(pc.roles)
+print(pc.messages())
 ```
 
-- `messages` will return the OpenAI-Compatible messages format, where you can directly pass it to `client.chat.completions.create(messages=...)`
+For concatenated prompts, like `sp / up`, the type will be converted to `PC` (prompt chain), `PC` has below things:
+
+- `.roles`, a list of roles. For example, `(sp|up).roles` is `['system', 'user']`
+- `.messages(...)` pack prompts into OpenAI-Compatible messages JSON, where you can directly pass it to `client.chat.completions.create(messages=...)`.
+  - `messages` will assume the first role is `user`, then proceed in the order of user-assistant. When a prompt has a role, it will use that role. check `pc.infer_role` for final roles in messages.
+
+
+
+## Few promises in `prompt-string`
+
+- `P` inherits from `string`. Therefore, aside from the mentioned features, its other behaviors are just like those of a `string` in Python.
+- `prompt-string` won't add OpenAI and other AI SDKs as dependencies; it is simply a toolkit for prompts.
+- `prompt-string` will be super light and fast, with no heavy processes running behind the scenes.
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,2 @@
+pytest
+pytest-cov
diff --git a/tests/test_basic.py b/tests/test_basic.py