Skip to content

Commit 329e14a

Browse files
authored
Wd/audio (#962)
* rework navigation * wip * fix buffering * finalizing audio feature * fix lint * update changelog
1 parent 8a59f43 commit 329e14a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+1484
-1344
lines changed

CHANGELOG.md

+21
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
88

99
Nothing unreleased!
1010

11+
## [1.1.0rc0] - 2024-05-06
12+
13+
### Added
14+
15+
- `cl.on_audio_chunk` decorator to process incoming the user incoming audio stream
16+
- `cl.on_audio_end` decorator to react to the end of the user audio stream
17+
- The `cl.Audio` element now has an `auto_play` property
18+
- `http_referer` is now available in `cl.user_session`
19+
20+
### Changed
21+
22+
- The UI has been revamped, especially the navigation
23+
- The arrow up button has been removed from the input bar, however pressing the arrow up key still opens the last inputs menu
24+
- **[breaking]** the `send()` method on `cl.Message` now returns the message instead of the message id
25+
- **[breaking]** The `multi_modal` feature has been renamed `spontaneous_file_upload` in the config
26+
- Element display property now defaults to `inline` instead of `side`
27+
28+
### Fixed
29+
30+
- Stopping a task should now work better (using asyncio task.cancel)
31+
1132
## [1.0.506] - 2024-04-30
1233

1334
### Added

backend/chainlit/__init__.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
from chainlit.step import Step, step
5353
from chainlit.sync import make_async, run_sync
5454
from chainlit.telemetry import trace
55-
from chainlit.types import ChatProfile, ThreadDict
55+
from chainlit.types import AudioChunk, ChatProfile, ThreadDict
5656
from chainlit.user import PersistedUser, User
5757
from chainlit.user_session import user_session
5858
from chainlit.utils import make_module_getattr, wrap_user_function
@@ -224,6 +224,38 @@ def on_chat_end(func: Callable) -> Callable:
224224
return func
225225

226226

227+
@trace
228+
def on_audio_chunk(func: Callable) -> Callable:
229+
"""
230+
Hook to react to the audio chunks being sent.
231+
232+
Args:
233+
chunk (AudioChunk): The audio chunk being sent.
234+
235+
Returns:
236+
Callable[], Any]: The decorated hook.
237+
"""
238+
239+
config.code.on_audio_chunk = wrap_user_function(func, with_task=False)
240+
return func
241+
242+
243+
@trace
244+
def on_audio_end(func: Callable) -> Callable:
245+
"""
246+
Hook to react to the audio stream ending. This is called after the last audio chunk is sent.
247+
248+
Args:
249+
elements ([List[Element]): The files that were uploaded before starting the audio stream (if any).
250+
251+
Returns:
252+
Callable[], Any]: The decorated hook.
253+
"""
254+
255+
config.code.on_audio_end = wrap_user_function(func, with_task=True)
256+
return func
257+
258+
227259
@trace
228260
def author_rename(func: Callable[[str], str]) -> Callable[[str], str]:
229261
"""
@@ -318,6 +350,7 @@ def acall(self):
318350
__all__ = [
319351
"user_session",
320352
"CopilotFunction",
353+
"AudioChunk",
321354
"Action",
322355
"User",
323356
"PersistedUser",

backend/chainlit/config.py

+39-19
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616

1717
if TYPE_CHECKING:
1818
from chainlit.action import Action
19-
from chainlit.types import ChatProfile, ThreadDict
19+
from chainlit.element import ElementBased
20+
from chainlit.message import Message
21+
from chainlit.types import AudioChunk, ChatProfile, ThreadDict
2022
from chainlit.user import User
2123
from fastapi import Request, Response
2224

@@ -71,18 +73,26 @@
7173
# Automatically tag threads with the current chat profile (if a chat profile is used)
7274
auto_tag_thread = true
7375
74-
# Authorize users to upload files with messages
75-
[features.multi_modal]
76+
# Authorize users to spontaneously upload files with messages
77+
[features.spontaneous_file_upload]
7678
enabled = true
7779
accept = ["*/*"]
7880
max_files = 20
7981
max_size_mb = 500
8082
81-
# Allows user to use speech to text
82-
[features.speech_to_text]
83-
enabled = false
84-
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
85-
# language = "en-US"
83+
[features.audio]
84+
# Threshold for audio recording
85+
min_decibels = -45
86+
# Delay for the user to start speaking in MS
87+
initial_silence_timeout = 3000
88+
# Delay for the user to continue speaking in MS. If the user stops speaking for this duration, the recording will stop.
89+
silence_timeout = 1500
90+
# Above this duration (MS), the recording will forcefully stop.
91+
max_duration = 15000
92+
# Duration of the audio chunks in MS
93+
chunk_duration = 1000
94+
# Sample rate of the audio
95+
sample_rate = 44100
8696
8797
[UI]
8898
# Name of the app and chatbot.
@@ -189,26 +199,31 @@ class Theme(DataClassJsonMixin):
189199

190200

191201
@dataclass
192-
class SpeechToTextFeature:
193-
enabled: Optional[bool] = None
194-
language: Optional[str] = None
195-
196-
197-
@dataclass
198-
class MultiModalFeature:
202+
class SpontaneousFileUploadFeature(DataClassJsonMixin):
199203
enabled: Optional[bool] = None
200204
accept: Optional[Union[List[str], Dict[str, List[str]]]] = None
201205
max_files: Optional[int] = None
202206
max_size_mb: Optional[int] = None
203207

204208

209+
@dataclass
210+
class AudioFeature(DataClassJsonMixin):
211+
min_decibels: int = -45
212+
initial_silence_timeout: int = 2000
213+
silence_timeout: int = 1500
214+
chunk_duration: int = 1000
215+
max_duration: int = 15000
216+
sample_rate: int = 44100
217+
enabled: bool = False
218+
219+
205220
@dataclass()
206221
class FeaturesSettings(DataClassJsonMixin):
207222
prompt_playground: bool = True
208-
multi_modal: Optional[MultiModalFeature] = None
223+
spontaneous_file_upload: Optional[SpontaneousFileUploadFeature] = None
224+
audio: Optional[AudioFeature] = Field(default_factory=AudioFeature)
209225
latex: bool = False
210226
unsafe_allow_html: bool = False
211-
speech_to_text: Optional[SpeechToTextFeature] = None
212227
auto_tag_thread: bool = True
213228

214229

@@ -247,7 +262,10 @@ class CodeSettings:
247262
on_chat_start: Optional[Callable[[], Any]] = None
248263
on_chat_end: Optional[Callable[[], Any]] = None
249264
on_chat_resume: Optional[Callable[["ThreadDict"], Any]] = None
250-
on_message: Optional[Callable[[str], Any]] = None
265+
on_message: Optional[Callable[["Message"], Any]] = None
266+
on_audio_chunk: Optional[Callable[["AudioChunk"], Any]] = None
267+
on_audio_end: Optional[Callable[[List["ElementBased"]], Any]] = None
268+
251269
author_rename: Optional[Callable[[str], str]] = None
252270
on_settings_update: Optional[Callable[[Dict[str, Any]], Any]] = None
253271
set_chat_profiles: Optional[Callable[[Optional["User"]], List["ChatProfile"]]] = (
@@ -413,11 +431,13 @@ def load_settings():
413431

414432
ui_settings = UISettings(**ui_settings)
415433

434+
code_settings = CodeSettings(action_callbacks={})
435+
416436
return {
417437
"features": features_settings,
418438
"ui": ui_settings,
419439
"project": project_settings,
420-
"code": CodeSettings(action_callbacks={}),
440+
"code": code_settings,
421441
}
422442

423443

backend/chainlit/data/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def attachment_to_element_dict(self, attachment: Attachment) -> "ElementDict":
156156
"chainlitKey": None,
157157
"display": metadata.get("display", "side"),
158158
"language": metadata.get("language"),
159+
"autoPlay": metadata.get("autoPlay", None),
159160
"page": metadata.get("page"),
160161
"size": metadata.get("size"),
161162
"type": metadata.get("type", "file"),
@@ -219,7 +220,7 @@ def step_to_step_dict(self, step: LiteralStep) -> "StepDict":
219220
"disableFeedback": metadata.get("disableFeedback", False),
220221
"indent": metadata.get("indent"),
221222
"language": metadata.get("language"),
222-
"isError": metadata.get("isError", False),
223+
"isError": bool(step.error),
223224
"waitForAnswer": metadata.get("waitForAnswer", False),
224225
}
225226

@@ -348,7 +349,6 @@ async def create_step(self, step_dict: "StepDict"):
348349
step_dict.get("metadata", {}),
349350
**{
350351
"disableFeedback": step_dict.get("disableFeedback"),
351-
"isError": step_dict.get("isError"),
352352
"waitForAnswer": step_dict.get("waitForAnswer"),
353353
"language": step_dict.get("language"),
354354
"showInput": step_dict.get("showInput"),
@@ -372,6 +372,8 @@ async def create_step(self, step_dict: "StepDict"):
372372
step["input"] = {"content": step_dict.get("input")}
373373
if step_dict.get("output"):
374374
step["output"] = {"content": step_dict.get("output")}
375+
if step_dict.get("isError"):
376+
step["error"] = step_dict.get("output")
375377

376378
await self.client.api.send_steps([step])
377379

backend/chainlit/data/sql_alchemy.py

+19-12
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,14 @@ async def update_thread(
170170
raise ValueError("User not found in session context")
171171
data = {
172172
"id": thread_id,
173-
"createdAt": await self.get_current_timestamp()
174-
if metadata is None
175-
else None,
176-
"name": name
177-
if name is not None
178-
else (metadata.get("name") if metadata and "name" in metadata else None),
173+
"createdAt": (
174+
await self.get_current_timestamp() if metadata is None else None
175+
),
176+
"name": (
177+
name
178+
if name is not None
179+
else (metadata.get("name") if metadata and "name" in metadata else None)
180+
),
179181
"userId": user_id,
180182
"userIdentifier": user_identifier,
181183
"tags": tags,
@@ -552,13 +554,17 @@ async def get_all_user_threads(
552554
streaming=step_feedback.get("step_streaming", False),
553555
waitForAnswer=step_feedback.get("step_waitforanswer"),
554556
isError=step_feedback.get("step_iserror"),
555-
metadata=step_feedback["step_metadata"]
556-
if step_feedback.get("step_metadata") is not None
557-
else {},
557+
metadata=(
558+
step_feedback["step_metadata"]
559+
if step_feedback.get("step_metadata") is not None
560+
else {}
561+
),
558562
tags=step_feedback.get("step_tags"),
559-
input=step_feedback.get("step_input", "")
560-
if step_feedback["step_showinput"]
561-
else "",
563+
input=(
564+
step_feedback.get("step_input", "")
565+
if step_feedback["step_showinput"]
566+
else ""
567+
),
562568
output=step_feedback.get("step_output", ""),
563569
createdAt=step_feedback.get("step_createdat"),
564570
start=step_feedback.get("step_start"),
@@ -587,6 +593,7 @@ async def get_all_user_threads(
587593
display=element["element_display"],
588594
size=element.get("element_size"),
589595
language=element.get("element_language"),
596+
autoPlay=element.get("element_autoPlay"),
590597
page=element.get("element_page"),
591598
forId=element.get("element_forid"),
592599
mime=element.get("element_mime"),

backend/chainlit/element.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class ElementDict(TypedDict):
3838
size: Optional[ElementSize]
3939
language: Optional[str]
4040
page: Optional[int]
41+
autoPlay: Optional[bool]
4142
forId: Optional[str]
4243
mime: Optional[str]
4344

@@ -61,7 +62,7 @@ class Element:
6162
# The byte content of the element.
6263
content: Optional[Union[bytes, str]] = None
6364
# Controls how the image element should be displayed in the UI. Choices are “side” (default), “inline”, or “page”.
64-
display: ElementDisplay = Field(default="side")
65+
display: ElementDisplay = Field(default="inline")
6566
# Controls element size
6667
size: Optional[ElementSize] = None
6768
# The ID of the message this element is associated with.
@@ -93,6 +94,7 @@ def to_dict(self) -> ElementDict:
9394
"objectKey": getattr(self, "object_key", None),
9495
"size": getattr(self, "size", None),
9596
"page": getattr(self, "page", None),
97+
"autoPlay": getattr(self, "auto_play", None),
9698
"language": getattr(self, "language", None),
9799
"forId": getattr(self, "for_id", None),
98100
"mime": getattr(self, "mime", None),
@@ -306,6 +308,7 @@ async def preprocess_content(self):
306308
@dataclass
307309
class Audio(Element):
308310
type: ClassVar[ElementType] = "audio"
311+
auto_play: bool = False
309312

310313

311314
@dataclass

backend/chainlit/llama_index/callbacks.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def on_event_start(
7070
) -> str:
7171
"""Run when an event starts and return id of event."""
7272
self._restore_context()
73-
73+
7474
step_type: StepType = "undefined"
7575
if event_type == CBEventType.RETRIEVE:
7676
step_type = "retrieval"
@@ -104,7 +104,6 @@ def on_event_end(
104104
"""Run when an event ends."""
105105
step = self.steps.get(event_id, None)
106106

107-
108107
if payload is None or step is None:
109108
return
110109

@@ -117,11 +116,13 @@ def on_event_end(
117116
source_nodes = getattr(response, "source_nodes", None)
118117
if source_nodes:
119118
source_refs = ", ".join(
120-
[f"Source {idx}" for idx, _ in enumerate(source_nodes)])
119+
[f"Source {idx}" for idx, _ in enumerate(source_nodes)]
120+
)
121121
step.elements = [
122122
Text(
123123
name=f"Source {idx}",
124124
content=source.text or "Empty node",
125+
display="side",
125126
)
126127
for idx, source in enumerate(source_nodes)
127128
]
@@ -137,6 +138,7 @@ def on_event_end(
137138
step.elements = [
138139
Text(
139140
name=f"Source {idx}",
141+
display="side",
140142
content=source.node.get_text() or "Empty node",
141143
)
142144
for idx, source in enumerate(sources)
@@ -173,7 +175,7 @@ def on_event_end(
173175
token_count = self.total_llm_token_count or None
174176
raw_response = response.raw if response else None
175177
model = raw_response.get("model", None) if raw_response else None
176-
178+
177179
if messages and isinstance(response, ChatResponse):
178180
msg: ChatMessage = response.message
179181
step.generation = ChatGeneration(
@@ -198,12 +200,11 @@ def on_event_end(
198200
else:
199201
step.output = payload
200202
self.context.loop.create_task(step.update())
201-
203+
202204
self.steps.pop(event_id, None)
203205

204206
def _noop(self, *args, **kwargs):
205207
pass
206208

207209
start_trace = _noop
208210
end_trace = _noop
209-

0 commit comments

Comments
 (0)