From 0e176679a00b5aa6cc641052e11853b485fdf10a Mon Sep 17 00:00:00 2001 From: Luis Epinosa-Anke Date: Fri, 11 Oct 2024 06:54:44 +0100 Subject: [PATCH] add requirements.txt and wip analysis notebook --- notebooks/analyze_sample_experiment.ipynb | 315 ++++++++++++++++++++++ requirements.txt | 8 + 2 files changed, 323 insertions(+) create mode 100644 notebooks/analyze_sample_experiment.ipynb create mode 100644 requirements.txt diff --git a/notebooks/analyze_sample_experiment.ipynb b/notebooks/analyze_sample_experiment.ipynb new file mode 100644 index 0000000..4160d0f --- /dev/null +++ b/notebooks/analyze_sample_experiment.ipynb @@ -0,0 +1,315 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "cache_folder = '../caches/sample_experiment'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded artifacts:\n", + "['_ih', '_oh', '_dh', 'In', 'Out', 'get_ipython', 'exit', 'quit', 'open', '_', '_i', '_ii', '_iii', '_i1', 'pickle', 'pd', '_i2', 'cache_folder', '_i3', 'os', 'load_cached_artifacts', 'artifacts', '_i4', '_4', '_i5', 're', 'faiss', 'vector_db', 'queries', 'reranked_rets', 'chunks', 'rets', 'disabled__cached_reranked_rets', 'docs', 'augmented_generations', '_i6', '_i7']\n" + ] + } + ], + "source": [ + "import os\n", + "import pickle\n", + "import re\n", + "import faiss\n", + "\n", + "def load_artifacts_as_variables(cache_folder):\n", + " for item in os.listdir(cache_folder):\n", + " \n", + " item_path = os.path.join(cache_folder, item)\n", + " \n", + " if item.endswith('.pkl'):\n", + " # Handle pickle files\n", + " var_name = re.sub(r'^cached_', '', item)\n", + " var_name = re.sub(r'\\.pkl$', '', var_name)\n", + " var_name = re.sub(r'[^a-zA-Z0-9_]', '_', var_name)\n", + " \n", + " with open(item_path, 'rb') as file:\n", + " globals()[var_name] = pickle.load(file)\n", + " \n", + " elif item == 'cached_vector_db':\n", + " # Handle vector database\n", + " vector_db_path = item_path\n", + " index_faiss_path = os.path.join(vector_db_path, 'index.faiss')\n", + " index_pkl_path = os.path.join(vector_db_path, 'index.pkl')\n", + " \n", + " if os.path.exists(index_faiss_path) and os.path.exists(index_pkl_path):\n", + " # Load FAISS index\n", + " index = faiss.read_index(index_faiss_path)\n", + " \n", + " # Load additional data from pickle file\n", + " with open(index_pkl_path, 'rb') as f:\n", + " index_data = pickle.load(f)\n", + " \n", + " # Combine into a single object\n", + " vector_db = {\n", + " 'index': index,\n", + " 'data': index_data\n", + " }\n", + " \n", + " globals()['vector_db'] = vector_db\n", + "\n", + " print(\"Loaded artifacts:\")\n", + " print([var for var in globals() if not var.startswith('__') and var != 'load_artifacts_as_variables'])\n", + "\n", + "load_artifacts_as_variables(cache_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?',\n", + " ' {\\n \"found_the_'),\n", + " (\"Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?\",\n", + " ' {\\n \"found_the_'),\n", + " ('Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory suggesting a lack of full truthfulness with the board as reported by TechCrunch?',\n", + " ' {\\n \"found_the_'),\n", + " ('Do the TechCrunch article on software companies and the Hacker News article on The Epoch Times both report an increase in revenue related to payment and subscription models, respectively?',\n", + " ' {\\n \"found_the_'),\n", + " (\"Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?\",\n", + " ' {\\n \"found_the_'),\n", + " ('Who is the individual alleged to have built a thriving crypto exchange on falsehoods and is accused by the prosecution of committing fraud for personal gain, as reported by both Fortune and TechCrunch?',\n", + " ' {\\n \"found_the_'),\n", + " (\"Does the TechCrunch article on Twitch's subscription revenue split policy indicate a different monetization strategy compared to the TechCrunch article on Beeper's plans for Beeper Mini subscriptions?\",\n", + " ' {\\n \"found_the_'),\n", + " (\"Does 'The New York Times' article attribute the success of the Buffalo Bills' defense to the contributions of Jordan Poyer, while the 'Sporting News' article suggests that the Baltimore Ravens' defense needs to improve before their game against the Cincinnati Bengals?\",\n", + " ' {\\n \"found_the_'),\n", + " ('What is the name of the organization discussed in TechCrunch articles that, despite its financial instability, is recognized for creating ChatGPT, which is both a priority and a platform for ongoing innovations, and is planning to enhance its capabilities with the release of GPT-4 and associated APIs?',\n", + " ' {\\n \"found_the_'),\n", + " ('Which company, as reported by both TechCrunch and The Verge, has spent billions to maintain its default search engine status on various platforms and is also accused of harming news publishers’ revenue through its business practices?',\n", + " ' Google\\n\\n{\\n \"found')]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(augmented_generations.items())[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(metadata={'category': 'entertainment', 'author': None, 'published_at': '2023-11-27 08:45:59.000000000Z', 'title': \"200+ of the best deals from Amazon's Cyber Monday sale\", 'url': 'https://mashable.com/article/cyber-monday-deals-amazon-2023', 'source': 'Mashable', 'hf_ds_id': 'yixuantt/MultiHopRAG', 'hf_ds_subset': 'corpus', 'hf_ds_split': 'train', 'hf_ds_col': 'body', 'start_index': 0}, page_content=\"Table of Contents Table of Contents Echo, Fire TV, and Kindle deals Apple deals TV deals Laptop deals Headphone and earbud deals Tablet deals Gaming deals Speaker deals Vacuum deals Kitchen deals Smart home deals Fitness deals Beauty tech deals Drone deals Camera deals Lego deals Gift card deals\\n\\nUPDATE: Nov. 27, 2023, 5:00 a.m. EST This post has been updated with all of the latest Cyber Monday deals available at Amazon.\\n\\nAmazon is dragging out the year's biggest shopping holiday(s) into 11 days of deals.\\n\\nThe retail giant began its Black Friday sale in the early morning of Friday, Nov. 17 (a week ahead of schedule) and was on top of making the switch to Cyber Monday language in the wee hours of Saturday, Nov. 25. Official Cyber Monday mode, which is currently on through Monday, Nov. 27, includes both a ton of deals carried over from Black Friday plus some new ones.\")" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chunks[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(metadata={'category': 'entertainment', 'author': None, 'published_at': '2023-11-27 08:45:59.000000000Z', 'title': \"200+ of the best deals from Amazon's Cyber Monday sale\", 'url': 'https://mashable.com/article/cyber-monday-deals-amazon-2023', 'source': 'Mashable', 'hf_ds_id': 'yixuantt/MultiHopRAG', 'hf_ds_subset': 'corpus', 'hf_ds_split': 'train', 'hf_ds_col': 'body'}, page_content='Table of Contents Table of Contents Echo, Fire TV, and Kindle deals Apple deals TV deals Laptop deals Headphone and earbud deals Tablet deals Gaming deals Speaker deals Vacuum deals Kitchen deals Smart home deals Fitness deals Beauty tech deals Drone deals Camera deals Lego deals Gift card deals\\n\\nUPDATE: Nov. 27, 2023, 5:00 a.m. EST This post has been updated with all of the latest Cyber Monday deals available at Amazon.\\n\\nAmazon is dragging out the year\\'s biggest shopping holiday(s) into 11 days of deals.\\n\\nThe retail giant began its Black Friday sale in the early morning of Friday, Nov. 17 (a week ahead of schedule) and was on top of making the switch to Cyber Monday language in the wee hours of Saturday, Nov. 25. Official Cyber Monday mode, which is currently on through Monday, Nov. 27, includes both a ton of deals carried over from Black Friday plus some new ones.\\n\\nWe\\'re curating a running list of Amazon\\'s best Cyber Weekend deals, spotlighting some of our favorites and noting when good deals inevitably sell out. Read on for the full rundown, and check back often: We\\'re going to be updating this story incessantly as the sale continues, as well as our even bigger (if you can imagine) list of Cyber Monday deals across more retailers.\\n\\nNote: All newly added deals are marked with a ✨, while deals with a 🔥 have dropped to an all-time low price. Amazon\\'s invite-only deals for Prime members are marked with a 📨. Deals with a strikeout were either sold out or expired at the time of writing.\\n\\nEcho, Fire TV, and Kindle deals\\n\\nWhy we like it\\n\\nAn Echo Show is a subtle yet game-changing addition to any room — and for less than $40 with this rollover Black Friday to Cyber Monday deal, there\\'s little reason to not make your life easier. The smart screen responds to Alexa commands that are particularly handy when your hands are full, like asking for measurement conversions mid-cooking, checking the weather mid-rushing out the door, or turning off your smart lights with a sleeping kid in hand. Plus, it\\'s got a 5.5-inch screen and better sound than its predecessor, making it perfect for watching videos or video calling friends and family.\\n\\nMore Amazon device and service deals\\n\\nAmazon services\\n\\nAudible Premium Plus — $5.95/month $14.95/month for four months (save $9/month; new customers only; get an additional $20 Audible credit)\\n\\nEcho Buds\\n\\nEcho smart displays\\n\\nEcho smart speakers\\n\\nFire tablets\\n\\nFire TVs\\n\\nNote: All Fire TVs come with a free 6-month subscription to MGM+ (a $35.94 value).\\n\\nFire TV streaming devices\\n\\neero\\n\\nKindles\\n\\nMiscellaneous Amazon devices\\n\\nApple deals\\n\\nWhy we like it\\n\\nNow that the 64GB 9th generation iPad has been going in and out of stock (you might be able to find it on sale for $229.99), our new favorite iPad deal at Amazon is the 10th generation 64GB model for $349. Compared to the 9th gen, the 10th gen classic iPad has a slightly bigger screen that\\'s now also a Liquid Retina display (10.9 inches versus 10.2 inches), a faster A14 Bionic chip for smoother multitasking, and USB-C charging.\\n\\nMore Apple deals\\n\\nAirPods\\n\\nMacBook\\n\\nMac\\n\\niPad\\n\\nApple Watch\\n\\nTV deals\\n\\nWhy we like it\\n\\nThis 65-inch Fire TV from Amazon hit its lowest ever price this month, and we\\'re not mad about it. With a 66 percent five-star review rating, it\\'s got much to love: Including stunning 4K QLED resolution, adaptive brightness that adjusts to the lighting of your room, the ability to project famous art or personal pics on it when not streaming anything, and, of course, that quintessential Alexa voice control.\\n\\nMore TV deals\\n\\n43 to 55 inches\\n\\n65 inches\\n\\n75 to 85 inches\\n\\nLaptop deals\\n\\nWhy we like it\\n\\nMacBooks aside (which are all listed above in the Apple section), another stellar Cyber Monday laptop deal at Amazon is the lightweight Microsoft Surface Laptop Go 3 for $599.99. This 25% discount drops the 2023 version to the regular asking price of our favorite budget laptop, the older Surface Go 2. Compared to the Go 2, the Go 3\\'s Intel Core i5 processor is 12th gen versus the Go 2\\'s 11th gen, harnessing better speeds and solid power for most everyday work or school tasks. On the outside, the Go 3 is definitely giving MacBook Air — the main difference being that the Go 3\\'s screen is a touchscreen.\\n\\nMore laptop deals\\n\\nTraditional laptops\\n\\n2-in-1 laptops\\n\\nChromebooks\\n\\nGaming laptops\\n\\nHeadphone and earbud deals\\n\\nWhy we like it\\n\\nNarrowing down a headphones upgrade from so many on-sale options is less overwhelming when you\\'ve confirmed that you want to stick with a super premium, super reputable brand like Bose — but also that you want to stick to a budget. There\\'s only one pair of Bose over-ear headphones you can grab for just under $200, and that\\'s the QuietComfort 45s at a record-low price of $199. This classic pair secures top-of-the-line ANC, 20 hours of battery life, and all-day comfort for less than half of the AirPods Max\\'s sale price.\\n\\nMore headphone and earbud deals\\n\\nHeadphones\\n\\nEarbuds\\n\\nTablet deals\\n\\nGaming deals\\n\\nGaming headsets\\n\\nMashable Deals Want more hand-picked deals in your inbox? Sign up for Mashable\\'s daily Deals newsletter. Loading... Sign Me Up By signing up you agree to our Terms of Use and Privacy Policy Thanks for signing up!\\n\\nGaming mice\\n\\nGaming keyboards\\n\\nVR headsets\\n\\nMeta Quest 2 — $249 $299.99 (save $50.99) + free $50 Amazon credit with code META50 🔥\\n\\nSpeaker deals\\n\\nVacuum deals\\n\\nCordless vacuums\\n\\nRobot vacuums and vacuum/mop hybrids\\n\\nKitchen deals\\n\\nPizza ovens\\n\\nSparkling water makers\\n\\nToaster ovens\\n\\nSmart home deals\\n\\nNote: Echo devices are listed above under \"Amazon device and service deals.\"\\n\\nBlink\\n\\nChromecast\\n\\nGoogle Nest\\n\\nFitness deals\\n\\nSmartwatches and fitness trackers\\n\\nNote: Apple Watches are listed above under \"Apple deals.\"\\n\\nBeauty tech deals\\n\\nDrone deals\\n\\nCamera deals\\n\\nGoPro\\n\\nLego deals\\n\\nGift card deals')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "queries[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?',\n", + " ['The trial of Sam Bankman-Fried is likely to be more consequential than just whether the man himself is found guilty. Depending on what evidence is introduced during the trial, it could be rough for the entire crypto industry.\\n\\n“How much damage can this trial do to the already beaten-down reputation of the industry at this point?” asks Yesha Yadav, a law professor at Vanderbilt University. “This trial is going to be an excruciating moment for the industry because no one knows what kind of evidence might come out.”\\n\\nBankman-Fried, the founder of FTX and Alameda Research, is facing seven counts of criminal charges: two counts of wire fraud, and five counts of conspiracy charges. FTX was a failed cryptocurrency exchange founded in 2019. According to a now-deleted profile from FTX investors Sequoia Capital, FTX was founded because of Bankman-Fried’s frustration with other exchanges when he was running Alameda Research, his crypto trading firm. According to the SEC, FTX was a fraud “from the start,” diverting customers’ funds to Alameda.',\n", + " 'Sam Bankman-Fried, the founder of failed cryptocurrency exchange FTX, is on trial for seven counts of wire fraud and conspiracy. FTX was a fraud “from the start,” the Securities and Exchange Commission alleges — with a “multi-billion-dollar deficiency caused by his own misappropriation of customer funds.” Follow along for all the latest news and regular updates from the trial.\\n\\nWho was making decisions to spend $8 billion of customer funds? Bankman-Fried couldn’t recall knowing anything about it. Were there rules or requirements for how money borrowed from FTX would be returned? Were there rules for risk management? “I was concerned with overall risk management,” Bankman-Fried said.\\n\\nBut it was the testimony about June 2022 that resonated the most to me. Didn’t Bankman-Fried ask what “fiat@ftx” was? He did. But — I did hear these words uttered aloud in a court of law this morning, I am not creative enough to make this kind of thing up — his employees told him “they were busy and I should stop asking questions because it was distracting.”',\n", + " '“What conversations happened between him and his co-conspirators that are now cooperating against him?”\\n\\nThe second part is where all the drama is likely to come, says Christopher LaVigne, a litigation partner and co-chair of the cryptocurrency practice at the law firm Withers. Prosecutors have to connect those transactions to Bankman-Fried, show that he knew what he was doing was wrong, and prove that he lied about it anyway.\\n\\n“What was he saying to his parents and his other advisors about this?” LaVigne says. “What conversations happened between him and his co-conspirators that are now cooperating against him?”\\n\\nTo further establish intent, the government can use Bankman-Fried’s own words. The indictment calls Bankman-Fried’s tweets in November 2022 “false and misleading.”',\n", + " '“One year ago, it looked like the defendant was on the top of the world,” began Thane Rehn, a prosecutor for the government, in his opening statement. The former CEO of FTX oversaw a supposedly thriving crypto exchange, jetted between international locales, and hobnobbed with celebrities like Tom Brady and Larry David. He repeatedly emphasized to customers that their money was safe and secure.\\n\\nBut “all of that, all of it, was built on lies,” Rehn declared to the jury. “Behind the curtains, he was not what he appeared to be.” What followed was a roughly 30-minute story that repeatedly emphasized how Bankman-Fried allegedly stole customer funds to facilitate his jet-setting lifestyle, donate millions to political candidates, and finance risky bets.\\n\\nThe key to his alleged scheme? Alameda Research, a crypto hedge fund he also owned, argued Rehn. Using Caroline Ellison, his on-again off-again girlfriend and the CEO of Alameda, as a front, Bankman-Fried had “secret access” to customer money—both cash and crypto—the government claimed.'])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(reranked_rets.items())[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?',\n", + " ['The trial of Sam Bankman-Fried is likely to be more consequential than just whether the man himself is found guilty. Depending on what evidence is introduced during the trial, it could be rough for the entire crypto industry.\\n\\n“How much damage can this trial do to the already beaten-down reputation of the industry at this point?” asks Yesha Yadav, a law professor at Vanderbilt University. “This trial is going to be an excruciating moment for the industry because no one knows what kind of evidence might come out.”\\n\\nBankman-Fried, the founder of FTX and Alameda Research, is facing seven counts of criminal charges: two counts of wire fraud, and five counts of conspiracy charges. FTX was a failed cryptocurrency exchange founded in 2019. According to a now-deleted profile from FTX investors Sequoia Capital, FTX was founded because of Bankman-Fried’s frustration with other exchanges when he was running Alameda Research, his crypto trading firm. According to the SEC, FTX was a fraud “from the start,” diverting customers’ funds to Alameda.',\n", + " 'Sam Bankman-Fried, the founder of failed cryptocurrency exchange FTX, is on trial for seven counts of wire fraud and conspiracy. FTX was a fraud “from the start,” the Securities and Exchange Commission alleges — with a “multi-billion-dollar deficiency caused by his own misappropriation of customer funds.” Follow along for all the latest news and regular updates from the trial.\\n\\nWho was making decisions to spend $8 billion of customer funds? Bankman-Fried couldn’t recall knowing anything about it. Were there rules or requirements for how money borrowed from FTX would be returned? Were there rules for risk management? “I was concerned with overall risk management,” Bankman-Fried said.\\n\\nBut it was the testimony about June 2022 that resonated the most to me. Didn’t Bankman-Fried ask what “fiat@ftx” was? He did. But — I did hear these words uttered aloud in a court of law this morning, I am not creative enough to make this kind of thing up — his employees told him “they were busy and I should stop asking questions because it was distracting.”',\n", + " '“What conversations happened between him and his co-conspirators that are now cooperating against him?”\\n\\nThe second part is where all the drama is likely to come, says Christopher LaVigne, a litigation partner and co-chair of the cryptocurrency practice at the law firm Withers. Prosecutors have to connect those transactions to Bankman-Fried, show that he knew what he was doing was wrong, and prove that he lied about it anyway.\\n\\n“What was he saying to his parents and his other advisors about this?” LaVigne says. “What conversations happened between him and his co-conspirators that are now cooperating against him?”\\n\\nTo further establish intent, the government can use Bankman-Fried’s own words. The indictment calls Bankman-Fried’s tweets in November 2022 “false and misleading.”',\n", + " '“One year ago, it looked like the defendant was on the top of the world,” began Thane Rehn, a prosecutor for the government, in his opening statement. The former CEO of FTX oversaw a supposedly thriving crypto exchange, jetted between international locales, and hobnobbed with celebrities like Tom Brady and Larry David. He repeatedly emphasized to customers that their money was safe and secure.\\n\\nBut “all of that, all of it, was built on lies,” Rehn declared to the jury. “Behind the curtains, he was not what he appeared to be.” What followed was a roughly 30-minute story that repeatedly emphasized how Bankman-Fried allegedly stole customer funds to facilitate his jet-setting lifestyle, donate millions to political candidates, and finance risky bets.\\n\\nThe key to his alleged scheme? Alameda Research, a crypto hedge fund he also owned, argued Rehn. Using Caroline Ellison, his on-again off-again girlfriend and the CEO of Alameda, as a front, Bankman-Fried had “secret access” to customer money—both cash and crypto—the government claimed.'])" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(disabled__cached_reranked_rets.items())[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['index', 'data'])" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vector_db.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index = vector_db['index']\n", + "index.is_trained" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'190908e1-c9b7-416d-b2af-171e58137b54'" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = vector_db['data']\n", + "data[1][0]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "synthetic-qa-paper", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..01ed6ba --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +torch +transformers +datasets +langchain +langchain-community +langchain-huggingface +ragatouille +accelerate \ No newline at end of file