diff --git a/PDF_to_Podcast_RAG.ipynb b/PDF_to_Podcast_RAG.ipynb
new file mode 100644
index 0000000..2d3c980
--- /dev/null
+++ b/PDF_to_Podcast_RAG.ipynb
@@ -0,0 +1,809 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ju_mt8GN1lAM"
+ },
+ "source": [
+ "# An Implementation of Notebook LM's PDF to Podcast"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "cnatbafQ1lAN"
+ },
+ "source": [
+ "## Overview\n",
+ "\n",
+ "Inspired by [Notebook LM's](https://notebooklm.google/) podcast generation feature and a recent open source implementation of [Open Notebook LM](https://github.com/gabrielchua/open-notebooklm). In this cookbook we will implement a walkthrough of how you can build a PDF to podcast pipeline.\n",
+ "\n",
+ "## Purpose of the Excercise\n",
+ "\n",
+ "The purpose of this exercise is to guide users through the process of building an automated pipeline that transforms a PDF document into a podcast-ready script and audio output. Specifically, it integrates PDF parsing, question generation, Retrieval-Augmented Generation (RAG) for contextual answers, and text-to-speech (TTS) synthesis to create a complete podcast production workflow."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## 1. Install Dependencies / Import Necessary Libraries\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "TSqdS-6u3ond"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "cN0Tpr76ssM1"
+ },
+ "outputs": [],
+ "source": [
+ "!apt install -qU libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg\n",
+ "!pip install -qU ffmpeg-python\n",
+ "!pip install -qU PyAudio\n",
+ "!pip install -qU cartesia #to access TTS model\n",
+ "!pip install -qU langchain-upstage langchain langchain_community\n",
+ "!pip install -qU faiss-cpu"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "iWea6go4r72c"
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from google.colab import userdata\n",
+ "\n",
+ "from pathlib import Path\n",
+ "from tempfile import NamedTemporaryFile\n",
+ "from typing import List, Literal, Tuple, Optional, Dict, Union, List, Any\n",
+ "\n",
+ "import json\n",
+ "from pydantic import BaseModel\n",
+ "\n",
+ "from cartesia import Cartesia\n",
+ "from pydantic import ValidationError\n",
+ "\n",
+ "from langchain_upstage import ChatUpstage, UpstageEmbeddings, UpstageDocumentParseLoader\n",
+ "from langchain_core.prompts import ChatPromptTemplate\n",
+ "\n",
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+ "from langchain.vectorstores import FAISS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# @title set API key\n",
+ "from pprint import pprint\n",
+ "import os\n",
+ "\n",
+ "import warnings\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")\n",
+ "\n",
+ "if \"google.colab\" in str(get_ipython()):\n",
+ " # Running in Google Colab. Please set the UPSTAGE_API_KEY in the Colab Secrets\n",
+ " from google.colab import userdata\n",
+ "\n",
+ " os.environ[\"UPSTAGE_API_KEY\"] = userdata.get(\"UPSTAGE_API_KEY\")\n",
+ " os.environ[\"CARTESIA_API_KEY\"] = userdata.get(\"CARTESIA_API_KEY\")\n",
+ "\n",
+ "else:\n",
+ " # Running locally. Please set the UPSTAGE_API_KEY in the .env file\n",
+ " from dotenv import load_dotenv\n",
+ "\n",
+ " load_dotenv()\n",
+ "\n",
+ "assert (\n",
+ " \"UPSTAGE_API_KEY\" in os.environ\n",
+ "), \"Please set the UPSTAGE_API_KEY environment variable\""
+ ],
+ "metadata": {
+ "id": "QprcKBaQ2xFr"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## 2. Generate QnA context for the podcast using RAG\n",
+ "\n",
+ "### [2-1] Generate 7 Questions to Ask from the PDF\n",
+ "\n",
+ "In this step, we use a Upstage Solar to generate insightful and engaging questions based on the content of the provided PDF. The goal is to create a comprehensive set of questions that cover various aspects of the document, making them suitable for a podcast interview format. The questions should be designed to provoke thought, encourage in-depth discussion, and highlight key points from the PDF content.\n",
+ "\n",
+ "\n",
+ "* Upstage DocParse\n",
+ "* Upstage Solar\n"
+ ],
+ "metadata": {
+ "id": "kgP3DiKd3SUj"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Load in PDF of Choice\n",
+ "def get_PDF_text(file):\n",
+ " text = ''\n",
+ " loader = UpstageDocumentParseLoader(file, output_format='text')\n",
+ "\n",
+ " pages = loader.load()\n",
+ " for page in pages:\n",
+ " text += page.page_content\n",
+ "\n",
+ " return text\n",
+ "\n",
+ "text = get_PDF_text('pdfs/solar_paper.pdf')"
+ ],
+ "metadata": {
+ "id": "GPdIe4rl5dVk"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Generate Questions Using LLM\n",
+ "\n",
+ "QUESTION_PROMPT = \"\"\"\n",
+ "You are an AI assistant tasked with generating a list of engaging questions for a podcast interview.\n",
+ "Based on the given text, create 7 questions that would be relevant for a podcast discussion.\n",
+ "The questions should be thought-provoking, insightful, and aimed at extracting key information.\n",
+ "Ensure the questions are diverse and cover different aspects of the text content.\n",
+ "\n",
+ "Return the questions as a json array and have all the key as questions\n",
+ "\"\"\"\n",
+ "\n",
+ "def generate_questions(system_prompt: str, text: str):\n",
+ "\n",
+ " llm = ChatUpstage(extra_body={\"response_format\": {\"type\": \"json_object\"}})\n",
+ " chat_prompt = ChatPromptTemplate.from_messages([\n",
+ " (\"system\", system_prompt),\n",
+ " (\"human\", \"{text}\")\n",
+ " ])\n",
+ "\n",
+ " chain = chat_prompt | llm\n",
+ "\n",
+ " response = chain.invoke({\"text\": text})\n",
+ " print(response.content)\n",
+ "\n",
+ " try:\n",
+ " response_dict = json.loads(response.content)\n",
+ " questions = response_dict.get(\"questions\", [])\n",
+ " if not isinstance(questions, list) or len(questions) == 0:\n",
+ " raise ValueError(\"Invalid response format or no questions generated\")\n",
+ " except (json.JSONDecodeError, ValueError) as e:\n",
+ " print(f\"Error generating questions: {e}\")\n",
+ " return []\n",
+ " return questions\n",
+ "\n",
+ "questions=generate_questions(QUESTION_PROMPT, text)"
+ ],
+ "metadata": {
+ "id": "XfEsTp2yTyPi"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "questions"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "-roMghdNqjq6",
+ "outputId": "7e98952c-7667-4983-c3a6-5b74cf1d1087"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['What is the main contribution of the SOLAR 10.7B model?',\n",
+ " 'How does the depth up-scaling (DUS) method differ from other up-scaling methods like mixture-of-experts (MoE)?',\n",
+ " 'What are the advantages of using DUS over other up-scaling methods?',\n",
+ " 'What are the key components of the SOLAR 10.7B model?',\n",
+ " 'How does the SOLAR 10.7B model outperform existing models in various NLP tasks?',\n",
+ " 'What are the different stages of fine-tuning for the SOLAR 10.7B-Instruct model?',\n",
+ " \"What is the role of the alignment tuning stage in enhancing the SOLAR 10.7B-Instruct model's performance?\",\n",
+ " 'How does the SOLAR 10.7B-Instruct model compare to other top-performing models in terms of performance metrics?',\n",
+ " 'What are the limitations and considerations of the depth up-scaling (DUS) method?',\n",
+ " 'How does the SOLAR 10.7B-Instruct model address ethical concerns in its operation?']"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 20
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### [2-2] Retrieve and Generate Answers for Each Question Using RAG\n",
+ "\n",
+ "Once the questions are generated, we use a Retrieval-Augmented Generation (RAG) approach to obtain contextually relevant answers. This involves retrieving the most relevant sections from the PDF content, which has been embedded into a vector store, and then using the language model to generate detailed and informative answers. This ensures that the responses are backed by the original document, making them accurate and well-supported for podcast narration.\n",
+ "\n",
+ "\n",
+ "* Upstage Embedding Model\n",
+ "* Faiss"
+ ],
+ "metadata": {
+ "id": "ICK762aO44k6"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Embed PDF Content and Create Vector Store\n",
+ "def vectorstore_embed(file_path: str) -> List[float]:\n",
+ " \"\"\"Embed the given text using the LLM.\"\"\"\n",
+ " loader = UpstageDocumentParseLoader('solar.pdf', output_format='text')\n",
+ " documents = loader.load()\n",
+ "\n",
+ "\n",
+ " text_splitter = RecursiveCharacterTextSplitter(\n",
+ " chunk_size=1000, chunk_overlap=200, length_function=len\n",
+ " )\n",
+ "\n",
+ " texts = text_splitter.split_documents(documents)\n",
+ "\n",
+ " for doc in texts:\n",
+ " doc.page_content = doc.page_content.replace('\\t', ' ')\n",
+ "\n",
+ " embeddings = UpstageEmbeddings(model=\"solar-embedding-1-large\")\n",
+ " vectorstore = FAISS.from_documents(texts, embeddings)\n",
+ "\n",
+ " return vectorstore\n",
+ "\n",
+ "vectorstore=vectorstore_embed('pdfs/solar_paper.pdf')"
+ ],
+ "metadata": {
+ "id": "Yg-yS0_tt-Wz"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Retrieve Contexts for Questions\n",
+ "def retrieve_contents(vectorstore: str, question: str):\n",
+ "\n",
+ " retriever_store = vectorstore.as_retriever(search_kwargs={\"k\": 1})\n",
+ "\n",
+ " docs = retriever_store.get_relevant_documents(question)\n",
+ "\n",
+ " return docs"
+ ],
+ "metadata": {
+ "id": "tBsRBVIctx8B"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Generate Answers Using LLM\n",
+ "def generate_answer(question: str) -> str:\n",
+ " \"\"\"Generate an answer to a given question using the provided context.\"\"\"\n",
+ "\n",
+ " context=retrieve_contents(vectorstore,question)\n",
+ "\n",
+ " prompt = f\"You are a Guest of the podcast interview and you will be answering as a professional. You just have to answer the following question based on the provided document: {question}. I want you to answer as if you are podcast interview\"\n",
+ "\n",
+ " llm = ChatUpstage()\n",
+ " chat_prompt = ChatPromptTemplate.from_messages([\n",
+ " (\"system\", prompt),\n",
+ " (\"human\",\"{context}\")\n",
+ " ])\n",
+ "\n",
+ " chain = chat_prompt | llm\n",
+ "\n",
+ " response = chain.invoke({\"context\": context})\n",
+ " print(response.content)\n",
+ "\n",
+ " return response.content\n"
+ ],
+ "metadata": {
+ "id": "68hhIlk6tbJP"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Create QA Script\n",
+ "def create_qa_script(questions, pdf_text):\n",
+ " qa_script = []\n",
+ " for question in questions:\n",
+ " answer = generate_answer(question)\n",
+ " qa_script.append({\"speaker\": \"Host (Jane)\", \"text\": question})\n",
+ " qa_script.append({\"speaker\": \"Guest\", \"text\": answer})\n",
+ " return qa_script\n",
+ "\n",
+ "qa_script = create_qa_script(questions, text)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ExwVH6nBfaAP",
+ "outputId": "6f8b609b-8357-4d59-aa9d-9974208975cf"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "metadata": {
+ "tags": null
+ },
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The main goal of the study presented in this paper is to investigate the advantages and limitations of dental, pharmacy, and public health education.\n",
+ "The proposed DUS method differs from other LLM up-scaling methods by focusing on depth up-scaling, which involves scaling the number of layers in the base model and continually pretraining the scaled model. Unlike some other methods that use Mixture of Experts (MoE) to scale the model, DUS uses a depthwise scaling method similar to Tan and Le (2019) adapted for the LLM architecture. This approach makes DUS more straightforward to use and immediately compatible with easy-to-use LLM frameworks like Hugging Face (Wolf et al., 2019) without requiring any changes to the existing framework.\n",
+ "The key components of the DUS method are not explicitly mentioned in the provided document. However, based on the context, it seems that the DUS method refers to the \"Depth Up-Scaling\" approach mentioned in the text. The limitations and considerations discussed in the document suggest that the DUS approach may involve the removal of layers from a base model and the need for more thorough explorations of hyperparameters. The document also mentions the model's computational demands and potential limitations for those with restricted computational resources. To answer the question about the key components of the DUS method, we would need more information or context about the specific method being referred to.\n",
+ "The DUS method addresses the limitations of existing up-scaling methods by introducing a new approach called Depth Up-Scaling (DUS). DUS is a method that focuses on increasing the depth of pre-trained models while maintaining their original width. This is achieved by removing a portion of the layers from the model and then pre-training the remaining layers with a larger batch size and for a longer period.\n",
+ "\n",
+ "One key limitation of the DUS approach is the need for more thorough exploration of hyperparameters, such as the number of layers removed from the model. The authors removed 8 layers from both ends of their base model due to hardware limitations, but they acknowledge that this value may not be optimal for enhancing performance. They plan to address this in future work through various comparative analyses.\n",
+ "\n",
+ "Another limitation is the extended time and cost of continued pre-training, which made it challenging to conduct more comprehensive experiments. This could limit the use of the model, especially for those with restricted computational resources.\n",
+ "\n",
+ "In terms of the model's broader implications, it's important to note that it has significant computational demands for both training and inference. Like all machine learning models, it is also vulnerable to various attacks and biases, which should be carefully considered when deploying the model in real-world applications.\n",
+ "The benefits of using DUS in scaling up LLMs include effective and efficient scaling, retaining simplicity for ease of use, compatibility with existing LLM frameworks, and no additional modules or dynamism as with MoE.\n",
+ "The DUS method compares to Mixture of Experts (MoE) in terms of complexity and efficiency in a way that it reduces the complexity while maintaining or even improving the model's performance. The vertical scaling approach of DUS eliminates the need for dynamism in the scaled model, which simplifies the implementation process compared to MoE models. This shift in approach offers a more straightforward way of working, moving away from the conventional challenges associated with MoE models, such as hyperparameter tuning and hardware efficiency tradeoffs.\n",
+ "The main contributions of the study are not explicitly mentioned in the provided document. However, based on the keywords and topics mentioned, the study likely discusses the advantages and limitations of dental, pharmacy, and public health education. It also mentions various research papers and authors in the field of artificial intelligence and machine learning, such as Noam Shazeer, Azalia Mirhoseini, and Tianxiao Shen. The document also mentions a specific paper titled \"Mixture models for diverse machine translation: Tricks of the trade\" by Tianxiao Shen, Myle Ott, Michael Auli, and Marc'Aurelio Ranzato.\n",
+ "SOLAR 10.7B is a deep learning model that has been introduced as part of this study. It is a scaled and continually pretrained model, and it is available under the Apache 2.0 license, which allows for commercial use. The model has been designed to bridge the gap between academic research and practical applications, making it accessible and useful in various fields.\n",
+ "\n",
+ "In terms of performance, SOLAR 10.7B excels across diverse benchmarks, indicating that it performs well in a variety of tasks and applications. However, the document does not provide direct comparisons with other models in terms of performance. Therefore, it is not possible to determine how SOLAR 10.7B compares to other models based on the information provided.\n",
+ "The significance of SOLAR 10.7B being available under the Apache 2.0 license is that it allows for commercial use and integration into a wide range of products and services. This bridges the gap between academic research and practical applications, making the advanced model more accessible and useful across various fields.\n",
+ "The fine-tuning of SOLAR 10.7B-Instruct enhances its capabilities by improving its performance on various tasks, such as language modeling, question answering, and code generation. This is achieved through a process called \"instruction tuning,\" which involves training the model on a large dataset of human instructions and their corresponding outputs.\n",
+ "\n",
+ "The paper reports that SOLAR 10.7B-Instruct outperforms other models, even larger ones, in some tasks. For example, it scores higher than Mixtral 8x7B-Instruct-v0.1 and Qwen 72B in terms of H6, a metric used to evaluate the model's performance on a variety of tasks.\n",
+ "\n",
+ "The authors also present ablation studies to analyze the effectiveness of different training datasets and stages in the fine-tuning process. These studies help to understand which components contribute most to the model's performance.\n",
+ "\n",
+ "Overall, the fine-tuning of SOLAR 10.7B-Instruct improves its ability to understand and follow human instructions, making it more useful for a wide range of applications.\n",
+ "The main challenges in implementing Mixture of Experts (MoE) models include dynamic routing and load-imbalanced computation. The intricacies associated with these aspects pose a considerable challenge in efficient implementation of MoE models. Additionally, existing hardware and software for deep learning, such as TPUs and XLA compilers, often require static knowledge of tensor shapes, making MoE implementation difficult on TPU. While GPU implementation offers more flexibility, sparse computation compatibility remains a hurdle. Striking the right balance between fixing the size of each expert to facilitate efficient computation and maintaining model quality creates a tradeoff between information preservation and hardware efficiency.\n",
+ "The DUS method addresses the need for specialized tools and frameworks for Mixture of Experts (MoE) models by introducing a different approach to model scaling. Unlike MoE models, which scale horizontally and introduce complexities such as dynamic routing and hyperparameter tuning, DUS scales models vertically and does not introduce dynamism in the scaled model. This shift in approach reduces the complexity associated with MoE models, making it easier to implement and maintain. As a result, the DUS method offers a more straightforward and less complex way of working compared to specialized tools and frameworks like Tutel and Megablocks, which are specifically designed for MoE models.\n",
+ "The purpose of instruction tuning is to enhance the steerability of large language models (LLMs) by fine-tuning them using data formatted as (instruction, input, output) for various tasks. This allows for targeted adjustments, providing a more controlled and task-oriented improvement to the model's capabilities. Instruction tuning differs from previous methods, which faced challenges in effectively guiding and controlling the behavior of large language models. The need for a more targeted approach arose from the limitations of existing methods, leading to the development of instruction tuning. This targeted approach enables better control over the model's behavior, making it more suitable for specific tasks and improving its overall performance in those tasks.\n",
+ "Alignment tuning helps LLMs generate more human-like responses by aligning their output with human intentions and preferences. This is achieved through techniques such as Reinforcement Learning with Human Feedback (RLHF), which involves learning a reward model based on human preferences and using reinforcement learning to guide the LLM towards prioritizing answers with the highest reward. This approach enables better control over the model's behavior, making it more suitable for specific tasks and improving its overall performance in alignment with user-defined objectives.\n",
+ "The three types of data contamination are guideline, raw text, and annotation. Guideline contamination occurs when a model accesses detailed annotation guidelines for a dataset, providing advantages in specific tasks. Raw text contamination occurs when a model is trained on the raw text of a dataset, giving it an advantage in tasks related to that dataset. Annotation contamination occurs when a model is trained on the annotations of a dataset, giving it an advantage in tasks related to that dataset. These types of contamination can impact the performance of a model, especially in zero and few-shot evaluations, and should be considered when evaluating the performance of a model.\n",
+ "The results of the data contamination test for SOLAR 10.7B-Instruct are as follows:\n",
+ "\n",
+ "- HellaSwag: N/A\n",
+ "- Winogrande: N/A\n",
+ "- MMLU: 0.06\n",
+ "- TruthfulQA: 0.15\n",
+ "- GSM8K: 0.28\n",
+ "- OpenOrca.ARC: 0.70\n",
+ "\n",
+ "These results indicate that there is no significant data contamination in the SOLAR 10.7B-Instruct model, as all values are well below the contamination threshold of 0.9.\n",
+ "The limitations and considerations of the DUS approach include the need for more thorough exploration of hyperparameters used in the DUS approach, such as the number of layers removed from the base model, and the significant computational demands for training and inference, which might limit its use, especially for those with restricted computational resources.\n",
+ "The broader implications of SOLAR 10.7B are significant, as it represents a major advancement in the field of large language models. This model has the potential to revolutionize various industries and applications, such as natural language processing, artificial intelligence, and machine learning. However, it is essential to understand the limitations of SOLAR 10.7B to guide future research and development in the field.\n",
+ "\n",
+ "To ensure the ethical use of SOLAR 10.7B, several steps have been taken. First, the researchers have demonstrated low levels of data contamination in their evaluations, which highlights their rigorous data handling and processing protocols. This underscores the reliability and integrity of the results obtained from SOLAR.\n",
+ "\n",
+ "Second, the researchers have ensured that all setups and methodologies used in their experiments steer clear of any potential ethical pitfalls. This proactive consideration and avoidance of ethically questionable practices demonstrate their commitment to responsible research.\n",
+ "\n",
+ "Lastly, the researchers have ensured that SOLAR complies with ethical standards, which underpins the reliability and integrity of the model. By addressing these ethical considerations, the researchers aim to foster trust in the use of SOLAR 10.7B and promote responsible innovation in the field of large language models.\n",
+ "The main challenges faced by researchers and practitioners in implementing Mixture of Experts (MoE) models are the high computational cost, the need for careful hyperparameter tuning, and the tradeoff between information preservation and hardware efficiency. These challenges necessitate specialized tools and frameworks, such as Tutel and Megablocks, to manage and implement MoE models effectively.\n",
+ "\n",
+ "The DUS (Densely Connected Mixture of Experts) method addresses these challenges by introducing model scaling in the vertical dimension, which reduces complexity compared to MoE. Unlike MoE, DUS does not introduce dynamism in the scaled model, offering a more straightforward approach to working with MoE models. This shift in approach allows researchers and practitioners to work with MoE models more efficiently, potentially offsetting the advantages of traditional MoE models.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "qa_script"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "eBwnfKjx3HoT",
+ "outputId": "91ff5ed4-42f5-44d8-bee7-83f165a90864"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[{'speaker': 'Host (Jane)',\n",
+ " 'text': 'What is the main contribution of the SOLAR 10.7B model?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': 'The main contribution of the SOLAR 10.7B model is the introduction of a depth-wise scaled and continually pretrained model that is available under the Apache 2.0 license for commercial use. This model outperforms other benchmarks in various fields, bridging the gap between academic research and practical applications.'},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'How does the depth up-scaling (DUS) method differ from other up-scaling methods like mixture-of-experts (MoE)?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': 'Depth up-scaling (DUS) differs from other up-scaling methods like mixture-of-experts (MoE) in several ways. Firstly, DUS focuses on increasing the number of layers in the base model, while MoE introduces a Mixture-of-Experts architecture to scale the model. Secondly, DUS uses a depthwise scaling method similar to Tan and Le (2019), which is adapted for the LLM architecture, whereas MoE employs a different approach. Lastly, DUS does not introduce any additional modules or dynamism, making it compatible with existing LLM frameworks like Hugging Face (Wolf et al., 2019) without requiring any changes.'},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'What are the advantages of using DUS over other up-scaling methods?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': 'The advantages of using DUS over other up-scaling methods are that it does not require additional modules like gating networks or dynamic expert selection, making it seamless to integrate into existing training and inference frameworks while maintaining high efficiency. Additionally, DUS does not necessitate a distinct training framework for optimal training efficiency or specialized CUDA kernels for fast inference.'},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'What are the key components of the SOLAR 10.7B model?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': 'The key components of the SOLAR 10.7B model are:\\n\\n1. Introduction of the SOLAR 10.7 Billion Parameter Model: The study has released the SOLAR 10.7B model, which is depth-wise scaled and continually pre-trained. This model is available under the Apache 2.0 license, allowing commercial use and integration into various products and services, bridging the gap between academic research and practical applications.\\n\\n2. Superior Performance Across Diverse Benchmarks: The SOLAR 10.7B model demonstrates exceptional performance across a wide range of benchmarks.'},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'How does the SOLAR 10.7B model outperform existing models in various NLP tasks?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': \"The SOLAR 10.7B model outperforms existing models in various NLP tasks due to its depth-wise scaling and continuous pretraining. The model's availability under the Apache 2.0 license allows for commercial use, bridging the gap between academic research and practical applications. SOLAR 10.7B has been shown to perform better across diverse benchmarks, demonstrating its superiority in various natural language processing tasks.\"},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'What are the different stages of fine-tuning for the SOLAR 10.7B-Instruct model?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': \"The different stages of fine-tuning for the SOLAR 10.7B-Instruct model are:\\n\\n1. Instruction tuning\\n2. Alignment tuning\\n\\nIn the instruction tuning stage, the model is trained to follow instructions in a QA format. This involves using open-source datasets, as well as synthesizing a math QA dataset to enhance the model's mathematical capabilities. The math QA dataset, called 'Synth. Math-Instruct', is created by rephrasing questions and answers from the Math dataset to avoid contamination with commonly used benchmark datasets.\\n\\nIn the alignment tuning stage, the instruction-tuned model is further fine-tuned to be more aligned with human or strong AI.\"},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': \"What is the role of the alignment tuning stage in enhancing the SOLAR 10.7B-Instruct model's performance?\"},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': \"The role of the alignment tuning stage in enhancing the SOLAR 10.7B-Instruct model's performance is to further fine-tune the model to be more aligned with human or strong AI preferences. This stage follows the instruction tuning stage, where the model is trained to follow instructions in a QA format using open-source datasets and a synthesized math QA dataset called 'Synth. Math-Instruct'. The alignment tuning stage aims to improve the model's performance by ensuring that it generates responses that are more in line with human expectations and preferences.\"},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'How does the SOLAR 10.7B-Instruct model compare to other top-performing models in terms of performance metrics?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': 'Based on the provided document, it is not possible to directly compare the SOLAR 10.7B-Instruct model with other top-performing models in terms of performance metrics. The document only mentions the evaluation results for SOLAR 10.7B and SOLAR 10.7B-Instruct, along with other top-performing models, in the Open LLM Leaderboard for six tasks. However, the document does not provide specific performance metrics for the SOLAR 10.7B-Instruct model compared to other top-performing models.\\n\\nTo answer the question, we would need more information about the performance metrics of the SOLAR 10.7B-Instruct model and how it compares to other top-performing models.'},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'What are the limitations and considerations of the depth up-scaling (DUS) method?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': \"The limitations and considerations of the Depth Up-Scaling (DUS) method include the need for more thorough explorations of hyperparameters used in the DUS approach, such as the removal of m = 8 layers from both ends of the base model due to hardware limitations, which may not be optimal for enhancing performance. The extended time and cost of continued pretraining made it challenging to conduct more comprehensive experiments, which the authors aim to address in future work through various comparative analyses. The model's significant computational demands for training and inference might limit its use, especially for those with restricted computational resources. Like all machine learning models, it is vulnerable to potential biases and errors in the training data, which could affect the model's performance and accuracy.\"},\n",
+ " {'speaker': 'Host (Jane)',\n",
+ " 'text': 'How does the SOLAR 10.7B-Instruct model address ethical concerns in its operation?'},\n",
+ " {'speaker': 'Guest',\n",
+ " 'text': 'To address ethical concerns in its operation, the SOLAR 10.7B-Instruct model emphasizes maintaining high ethical standards. It demonstrates low levels of data contamination through rigorous data handling and processing protocols, which are crucial for the reliability and integrity of the results. The model also ensures that all setups and methodologies employed in experiments steer clear of potential ethical pitfalls, and it avoids ethically questionable practices. SOLAR 10.7B-Instruct is committed to conducting innovative and responsible research.'}]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 25
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## 3. Generating the Complete Podcast Script with QnA script above\n",
+ "\n",
+ "This section involves generating an entire podcast script from the given Q&A content. The function should transform structured data into a conversational format suitable for a podcast setting, ensuring an engaging and natural dialogue flow between the host and the guest.\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "OeqNgEUb3Luu"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "class DialogueItem(BaseModel):\n",
+ " \"\"\"A single dialogue item.\"\"\"\n",
+ "\n",
+ " speaker: Literal[\"Host (Jane)\", \"Guest\"]\n",
+ " text: str\n",
+ "\n",
+ "\n",
+ "class Dialogue(BaseModel):\n",
+ " \"\"\"The dialogue between the host and guest.\"\"\"\n",
+ "\n",
+ " name_of_guest: str\n",
+ " dialogue: List[DialogueItem]"
+ ],
+ "metadata": {
+ "id": "M-q3v1LP91zb"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Adapted and modified from https://github.com/gabrielchua/open-notebooklm\n",
+ "SYSTEM_PROMPT = \"\"\"\n",
+ "You are a world-class podcast producer tasked with transforming the provided input text {text} into an engaging and informative podcast script.\n",
+ "Ensure the response adheres to this format:\n",
+ "\n",
+ "{{\n",
+ "\"name_of_guest\": \"\",\n",
+ "\"dialogue\": [\n",
+ " {{\n",
+ " \"speaker\": \"Host (Jane)\",\n",
+ " \"text\": \"\"\n",
+ " }},\n",
+ " {{\n",
+ " \"speaker\": \"Guest\",\n",
+ " \"text\": \"\",\n",
+ " }},\n",
+ " ...\n",
+ " ]\n",
+ "}}\n",
+ "\n",
+ "# Steps to Follow:\n",
+ "\n",
+ "0. for \"name_of_guest\": \"\" should be a real person name\n",
+ "\n",
+ "1. **Craft the Dialogue:**\n",
+ " Develop a natural, conversational flow between the host (Jane) and the guest speaker (the author or an expert on the topic).\n",
+ "\n",
+ " Dialogue content:\n",
+ " the {text} will be the main context for the podcast which is a QnA content.\n",
+ " Need all the questions and answers from the {text} in the podcast script.\n",
+ "\n",
+ " Incorporate:\n",
+ " - Clear explanations of complex topics\n",
+ " - An engaging and lively tone to captivate listeners\n",
+ " - A balance of information and entertainment\n",
+ "\n",
+ " Rules for the dialogue:\n",
+ " - The host (Jane) always initiates the conversation and interviews the guest\n",
+ " - Include thoughtful questions from the host to guide the discussion\n",
+ " - Incorporate natural speech patterns, including occasional verbal fillers (e.g., \"Uhh\", \"Hmmm\", \"um,\" \"well,\" \"you know\")\n",
+ " - Allow for natural interruptions and back-and-forth between host and guest - this is very important to make the conversation feel authentic\n",
+ " - Ensure the guest's responses are substantiated by the input text, avoiding unsupported claims\n",
+ " - Maintain a PG-rated conversation appropriate for all audiences\n",
+ " - Avoid any marketing or self-promotional content from the guest\n",
+ " - The host concludes the conversation\n",
+ "\n",
+ "\n",
+ "2. **Maintain Authenticity:**\n",
+ " Throughout the script, strive for authenticity in the conversation. Include:\n",
+ " - Moments of genuine curiosity or surprise from the host\n",
+ " - Instances where the guest might briefly struggle to articulate a complex idea\n",
+ " - Light-hearted moments or humor when appropriate\n",
+ " - Brief personal anecdotes or examples that relate to the topic (within the bounds of the input text)\n",
+ "\n",
+ "3. **Consider Pacing and Structure:**\n",
+ " Ensure the dialogue has a natural ebb and flow:\n",
+ " - Start with a strong hook to grab the listener's attention\n",
+ " - Gradually build complexity as the conversation progresses\n",
+ " - Include brief \"breather\" moments for listeners to absorb complex information\n",
+ " - For complicated concepts, reasking similar questions framed from a different perspective is recommended\n",
+ " - End on a high note, perhaps with a thought-provoking question or a call-to-action for listeners\n",
+ "\n",
+ "IMPORTANT RULE: Each line of dialogue should be no more than 100 characters (e.g., can finish within 5-8 seconds)\n",
+ "\n",
+ "Remember: Always reply in valid JSON format, without code blocks. Begin directly with the JSON output.\n",
+ "\"\"\""
+ ],
+ "metadata": {
+ "id": "iWH3ByXc9Q2s"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def call_llm(system_prompt: str, text, dialogue_format):\n",
+ " \"\"\"Call the LLM with the given prompt and dialogue format.\"\"\"\n",
+ " llm = ChatUpstage(extra_body={\"response_format\": {\"type\": \"json_object\", \"schema\":dialogue_format.model_json_schema()}})\n",
+ "\n",
+ "\n",
+ " chat_prompt = ChatPromptTemplate.from_messages([\n",
+ " (\"system\", system_prompt),\n",
+ " (\"human\", \"{text}\")\n",
+ " ])\n",
+ "\n",
+ " # Create the chain\n",
+ " chain = chat_prompt | llm\n",
+ "\n",
+ " # Call the chain with the input text\n",
+ " response = chain.invoke({\"text\": text})\n",
+ " return response"
+ ],
+ "metadata": {
+ "id": "BB06DARM9jYc"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def generate_script(system_prompt: str, input_text, output_model):\n",
+ " \"\"\"Get the dialogue from the LLM.\"\"\"\n",
+ " # Load as python object\n",
+ " try:\n",
+ " response = call_llm(system_prompt, input_text, output_model)\n",
+ " dialogue = output_model.model_validate_json(response.content)\n",
+ " except ValidationError as e:\n",
+ " error_message = f\"Failed to parse dialogue JSON: {e}\"\n",
+ " system_prompt_with_error = f\"{system_prompt}\\n\\nPlease return a VALID JSON object. This was the earlier error: {error_message}\"\n",
+ " response = call_llm(system_prompt_with_error, input_text, output_model)\n",
+ " dialogue = output_model.model_validate_json(response.content)\n",
+ " return dialogue"
+ ],
+ "metadata": {
+ "id": "XnMDM-ko9lpj"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### Generate script"
+ ],
+ "metadata": {
+ "id": "iZwHqfmV27cy"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "script = generate_script(SYSTEM_PROMPT, qa_script, Dialogue)"
+ ],
+ "metadata": {
+ "id": "8eVdjHpT9rg6"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "script"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "iBUKhlGtwyFF",
+ "outputId": "f1657cfd-00f9-4afb-84f3-1c3caf2a3cce"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Dialogue(name_of_guest='Dr. Alice Chan', dialogue=[DialogueItem(speaker='Host (Jane)', text=\"Welcome to our podcast, Dr. Alice Chan. Today, we'll be discussing the SOLAR 10.7B model and its contributions to the field of natural language processing. Let's start with the main contribution of this model. Can you explain what it is, Dr. Chan?\"), DialogueItem(speaker='Guest', text='Of course, Jane. The main contribution of the SOLAR 10.7B model is the introduction of a depth-wise scaled and continually pretrained model that is available under the Apache 2.0 license for commercial use. This model outperforms other benchmarks in various fields, bridging the gap between academic research and practical applications.'), DialogueItem(speaker='Host (Jane)', text=\"Interesting! Now, let's talk about the depth up-scaling (DUS) method. How does it differ from other up-scaling methods like mixture-of-experts (MoE)?\"), DialogueItem(speaker='Guest', text='Depth up-scaling (DUS) differs from other up-scaling methods like mixture-of-experts (MoE) in several ways. Firstly, DUS focuses on increasing the number of layers in the base model, while MoE introduces a Mixture-of-Experts architecture to scale the model. Secondly, DUS uses a depthwise scaling method similar to Tan and Le (2019), which is adapted for the LLM architecture, whereas MoE employs a different approach. Lastly, DUS does not introduce any additional modules or dynamism, making it compatible with existing LLM frameworks like Hugging Face (Wolf et al., 2019) without requiring any changes.'), DialogueItem(speaker='Host (Jane)', text='That sounds like a very efficient method. What are the advantages of using DUS over other up-scaling methods?'), DialogueItem(speaker='Guest', text='The advantages of using DUS over other up-scaling methods are that it does not require additional modules like gating networks or dynamic expert selection, making it seamless to integrate into existing training and inference frameworks while maintaining high efficiency. Additionally, DUS does not necessitate a distinct training framework for optimal training efficiency or specialized CUDA kernels for fast inference.'), DialogueItem(speaker='Host (Jane)', text=\"Now, let's dive into the key components of the SOLAR 10.7B model. What are they?\"), DialogueItem(speaker='Guest', text='The key components of the SOLAR 10.7B model are:\\n\\n1. Introduction of the SOLAR 10.7 Billion Parameter Model: The study has released the SOLAR 10.7B model, which is depth-wise scaled and continually pre-trained. This model is available under the Apache 2.0 license, allowing commercial use and integration into various products and services, bridging the gap between academic research and practical applications.\\n\\n2. Superior Performance Across Diverse Benchmarks: The SOLAR 10.7B model demonstrates exceptional performance across a wide range of benchmarks.'), DialogueItem(speaker='Host (Jane)', text='How does the SOLAR 10.7B model outperform existing models in various NLP tasks?'), DialogueItem(speaker='Guest', text=\"The SOLAR 10.7B model outperforms existing models in various NLP tasks due to its depth-wise scaling and continuous pretraining. The model's availability under the Apache 2.0 license allows for commercial use, bridging the gap between academic research and practical applications. SOLAR 10.7B has been shown to perform better across diverse benchmarks, demonstrating its superiority in various natural language processing tasks.\"), DialogueItem(speaker='Host (Jane)', text=\"It's fascinating how this model has been fine-tuned to perform so well. Can you tell us about the different stages of fine-tuning for the SOLAR 10.7B-Instruct model?\"), DialogueItem(speaker='Guest', text=\"The different stages of fine-tuning for the SOLAR 10.7B-Instruct model are:\\n\\n1. Instruction tuning\\n2. Alignment tuning\\n\\nIn the instruction tuning stage, the model is trained to follow instructions in a QA format. This involves using open-source datasets, as well as synthesizing a math QA dataset to enhance the model's mathematical capabilities. The math QA dataset, called 'Synth. Math-Instruct', is created by rephrasing questions and answers from the Math dataset to avoid contamination with commonly used benchmark datasets.\\n\\nIn the alignment tuning stage, the instruction-tuned model is further fine-tuned to be more aligned with human or strong AI.\"), DialogueItem(speaker='Host (Jane)', text=\"What is the role of the alignment tuning stage in enhancing the SOLAR 10.7B-Instruct model's performance?\"), DialogueItem(speaker='Guest', text=\"The role of the alignment tuning stage in enhancing the SOLAR 10.7B-Instruct model's performance is to further fine-tune the model to be more aligned with human or strong AI preferences. This stage follows the instruction tuning stage, where the model is trained to follow instructions in a QA format using open-source datasets and a synthesized math QA dataset called 'Synth. Math-Instruct'. The alignment tuning stage aims to improve the model's performance by ensuring that it generates responses that are more in line with human expectations and preferences.\"), DialogueItem(speaker='Host (Jane)', text='How does the SOLAR 10.7B-Instruct model compare to other top-performing models in terms of performance metrics?'), DialogueItem(speaker='Guest', text='Based on the provided document, it is not possible to directly compare the SOLAR 10.7B-Instruct model with other top-performing models in terms of performance metrics. The document only mentions the evaluation results for SOLAR 10.7B and SOLAR 10.7B-Instruct, along with other top-performing models, in the Open LLM Leaderboard for six tasks. However, the document does not provide specific performance metrics for the SOLAR 10.7B-Instruct model compared to other top-performing models.\\n\\nTo answer the question, we would need more information about the performance metrics of the SOLAR 10.7B-Instruct model and how it compares to other top-performing models.'), DialogueItem(speaker='Host (Jane)', text=\"That's a shame. Are there any limitations or considerations to keep in mind when using the depth up-scaling (DUS) method?\"), DialogueItem(speaker='Guest', text=\"The limitations and considerations of the Depth Up-Scaling (DUS) method include the need for more thorough explorations of hyperparameters used in the DUS approach, such as the removal of m = 8 layers from both ends of the base model due to hardware limitations, which may not be optimal for enhancing performance. The extended time and cost of continued pretraining made it challenging to conduct more comprehensive experiments, which the authors aim to address in future work through various comparative analyses. The model's significant computational demands for training and inference might limit its use, especially for those with restricted computational resources. Like all machine learning models, it is vulnerable to potential biases and errors in the training data, which could affect the model's performance and accuracy.\"), DialogueItem(speaker='Host (Jane)', text='Lastly, how does the SOLAR 10.7B-Instruct model address ethical concerns in its operation?'), DialogueItem(speaker='Guest', text='To address ethical concerns in its operation, the SOLAR 10.7B-Instruct model emphasizes maintaining high ethical standards. It demonstrates low levels of data contamination through rigorous data handling and processing protocols, which are crucial for the reliability and integrity of the results. The model also ensures that all setups and methodologies employed in experiments steer clear of potential ethical pitfalls, and it avoids ethically questionable practices. SOLAR 10.7B-Instruct is committed to conducting innovative and responsible research.')])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 99
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "PW4-FUmB1lAS"
+ },
+ "source": [
+ "## 4. Generate Podcast Using TTS\n",
+ "\n",
+ "Below we read through the script and parse choose the TTS voice depending on the speaker. We define a speaker and guest voice id.\n",
+ "\n",
+ "We can loop through the lines in the script and generate them by a call to the TTS model with specific voice and lines configurations. The lines all appended to the same buffer and once the script finishes we write this out to a `wav` file, ready to be played.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import subprocess\n",
+ "import ffmpeg\n",
+ "\n",
+ "host_id = \"694f9389-aac1-45b6-b726-9d9369183238\" # Jane - host\n",
+ "guest_id = \"a0e99841-438c-4a64-b679-ae501e7d6091\" # Guest\n",
+ "\n",
+ "model_id = \"sonic-english\" # The Sonic Cartesia model for English TTS\n",
+ "\n",
+ "output_format = {\n",
+ " \"container\": \"raw\",\n",
+ " \"encoding\": \"pcm_f32le\",\n",
+ " \"sample_rate\": 44100,\n",
+ "}\n",
+ "\n",
+ "client_cartesia = Cartesia(api_key=os.environ.get(\"CARTESIA_API_KEY\"))\n",
+ "\n",
+ "\n",
+ "# Set up a WebSocket connection.\n",
+ "ws = client_cartesia.tts.websocket()\n",
+ "\n",
+ "# Open a file to write the raw PCM audio bytes to.\n",
+ "f = open(\"podcast.pcm\", \"wb\")\n",
+ "\n",
+ "# Generate and stream audio.\n",
+ "for line in script.dialogue:\n",
+ " if line.speaker == \"Guest\":\n",
+ " voice_id = guest_id\n",
+ " else:\n",
+ " voice_id = host_id\n",
+ "\n",
+ " for output in ws.send(\n",
+ " model_id=model_id,\n",
+ " transcript='-' + line.text, # the \"-\"\" is to add a pause between speakers\n",
+ " voice_id=voice_id,\n",
+ " stream=True,\n",
+ " output_format=output_format,\n",
+ " ):\n",
+ " buffer = output[\"audio\"] # buffer contains raw PCM audio bytes\n",
+ " f.write(buffer)\n",
+ "\n",
+ "# Close the connection to release resources\n",
+ "ws.close()\n",
+ "f.close()\n",
+ "\n",
+ "# Convert the raw PCM bytes to a WAV file.\n",
+ "ffmpeg.input(\"podcast.pcm\", format=\"f32le\").output(\"podcast.wav\").run()\n",
+ "\n",
+ "# Play the file\n",
+ "subprocess.run([\"ffplay\", \"-autoexit\", \"-nodisp\", \"podcast.wav\"])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "3xn6I3pn7oI8",
+ "outputId": "21c52a79-0a20-4cba-ac79-c404edff5379"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "CompletedProcess(args=['ffplay', '-autoexit', '-nodisp', 'podcast.wav'], returncode=0)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 104
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "STWaJf_ySctY"
+ },
+ "outputs": [],
+ "source": [
+ "# Play the podcast\n",
+ "import IPython\n",
+ "IPython.display.Audio(\"podcast.wav\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file