From 8238e41bcb8f1da5ee1dddc88113fe8d8e28c3ac Mon Sep 17 00:00:00 2001 From: Ryon Date: Tue, 17 Jan 2023 17:08:56 -0500 Subject: [PATCH 01/11] chat backend --- chat-backend/chat.py | 54 +++++++++++++++++++++++++++++++++++ chat-backend/requirements.txt | 5 ++++ chat-backend/server.py | 23 +++++++++++++++ chat-backend/web3fuctions.txt | 11 +++++++ 4 files changed, 93 insertions(+) create mode 100644 chat-backend/chat.py create mode 100644 chat-backend/requirements.txt create mode 100644 chat-backend/server.py create mode 100644 chat-backend/web3fuctions.txt diff --git a/chat-backend/chat.py b/chat-backend/chat.py new file mode 100644 index 0000000..71771d3 --- /dev/null +++ b/chat-backend/chat.py @@ -0,0 +1,54 @@ +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.vectorstores.faiss import FAISS +from langchain.llms import OpenAI +from langchain.prompts import PromptTemplate +from langchain.chains import LLMChain +import os + + +os.environ["OPENAI_API_KEY"] = "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" +OpenAI.api_key = "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" +embeddings = OpenAIEmbeddings() +with open('./web3fuctions.txt') as f: + web3functions = f.read() +# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) +# instructions = text_splitter.split_text(ladle) +instructions = web3functions.split("---") +print(instructions) +docsearch = FAISS.from_texts(instructions, embeddings) + +template = ''' +You are a web3 assistant. You help users use web3 apps, such as Uniswap, AAVE, MakerDao, etc. You assist users in achieving their goals with these protocols, by providing users with relevant information, and creating transactions for users. + +To help users, an assistant may display information or dialog boxes using magic commands. Magic commands have the structure "<|command(parameter1, parameter2, ...)|>". When the assistant uses a command, users will see data, an interaction box, or other inline item, not the command. Users cannot use magic commands. + +Information to help complete your task: +{taskInfo} + +Information about the chat so far: +{summary} + +Chat History: +{history} +Assistant:''' + +prompt = PromptTemplate( + input_variables=["taskInfo", "summary", "history"], + template=template, +) +llm = OpenAI(temperature=0.9) +chain = LLMChain(llm=llm, prompt=prompt) +chain.verbose = True + +def chat(userinput, history): + docs = docsearch.similarity_search(userinput) + taskInfo = ''.join([doc.page_content for doc in docs]) + historyString = "" + history = history or [] + for line in history: + historyString += ("User: " + line[0] + "\n") + historyString += ("Assistant: " + line[1] + "\n") + historyString += ("User: " + userinput ) + result = chain.run({"taskInfo":taskInfo, "summary":"", "history":historyString, "stop":"User"}) + history.append((userinput, result)) + return result,history diff --git a/chat-backend/requirements.txt b/chat-backend/requirements.txt new file mode 100644 index 0000000..8c976ce --- /dev/null +++ b/chat-backend/requirements.txt @@ -0,0 +1,5 @@ +langchain==0.0.64 +gradio==3.16.2 +openai==0.26.1 +faiss-cpu==1.7.3 +websocket-server==0.6.4 diff --git a/chat-backend/server.py b/chat-backend/server.py new file mode 100644 index 0000000..78b9437 --- /dev/null +++ b/chat-backend/server.py @@ -0,0 +1,23 @@ +import logging +from websocket_server import WebsocketServer + +client_id_to_chat_history={} + +def new_client(client, server): + client_id_to_chat_history[client['id']]=[] + +def client_left(client, server): + client_id_to_chat_history.pop(client['id']) + +def message_received(client, server, message): + from chat import chat + r,h=chat(message,client_id_to_chat_history[client['id']]) + client_id_to_chat_history[client['id']] = h + server.send_message(client,r) + + +server = WebsocketServer(host='127.0.0.1', port=9999, loglevel=logging.INFO) +server.set_fn_new_client(new_client) +server.set_fn_message_received(message_received) +server.set_fn_client_left(client_left) +server.run_forever() diff --git a/chat-backend/web3fuctions.txt b/chat-backend/web3fuctions.txt new file mode 100644 index 0000000..052a11c --- /dev/null +++ b/chat-backend/web3fuctions.txt @@ -0,0 +1,11 @@ +Uniswap is a decentralized application for trading one token for another. To create a Uniswap transaction, a user must provide one of two possible combinations of information: +1.) A token to sell, a token to buy, and an amount to sell (the user can specify to sell everything in their wallet. (keyword is “SELLAMOUNT”) +2.) A token to sell, a token to buy, and an amount to buy (the user can specify to buy as much as possible given the amount of tokens they have to sell). (keyword is “BUYAMOUNT”) +--- +When a user has provided the requested information, you can provide them with a transaction completion box. For uniswap, an assistant can trigger the transaction completion box by using the magic command “<|uniswap(TokenToSell, TokenToBuy, keyword, amount)|>. For example, if a user wants to sell 5 WETH to buy USDC, an assistant should trigger the transaction completion box with “<|uniswap(“WETH”, “USDC”, “SELLAMOUNT”, 5.00)|>”. +--- +The user can request their balance of a token. The user’s balance of a token can be provided by an assistant using the magic command “<|balance(token)|>”. For example, “Your balance of USDC is <|balance(‘USDC’)|>” will cause the user’s USDC balance to be displayed. +--- +The user can request the current price of a token. An assistant can retrieve the price of a token using the magic command “<|price(baseToken, quoteToken )|>”. For example, if the user has requested the price of ETH in USDC, an assistant may answer with “The cost of ETH in USDC is <|price('ETH','USDC')|>”. +--- +MakerDao is a decentralized application for borrowing Dai, a dollar-pegged stabledoin. To borrow Dai, a user must provide the following information, the collateral to provide, the amount of collateral provided, and the amount of Dai to borrow. When a user has provided the requested information, an assistant can provide them with a transaction completion box. For MakerDao, the transaction completion box can be triggered with "<|MakerDao(collateralToBorrow, CollateralAmount, AmountToBorrow)". For example if a user wants to borrow 1000 Dai by providing 10 WETH, an assistant should trigger the transaction completion box with "You may complete the borrow with using this dialog: <|MakerDao("WETH", 10, 1000)|>". From 774e778eaef3f2f53cb952ddadf0340d9d127841 Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Mon, 4 Sep 2023 19:16:34 -0500 Subject: [PATCH 02/11] wip --- .gitignore | 2 + .../c4/docs/awarding-process.json | 1 + .../c4/docs/backstage-wardens.json | 1 + .../c4/docs/certified-contributors.json | 1 + .../c4/docs/contest-process.json | 1 + .../knowledge_base/c4/docs/curve-logic.json | 1 + .../c4/docs/email-protection.json | 1 + .../c4/docs/fairness-and-validity.json | 1 + .../c4/docs/frequently-asked-questions.json | 1 + .../c4/docs/how-to-judge-a-contest.json | 1 + .../knowledge_base/c4/docs/how-we-work.json | 1 + .../c4/docs/incentive-model-and-awards.json | 1 + qa_bot/knowledge_base/c4/docs/index.json | 1 + .../c4/docs/intentionally-structured.json | 1 + qa_bot/knowledge_base/c4/docs/judges.json | 1 + .../c4/docs/judging-criteria.json | 1 + qa_bot/knowledge_base/c4/docs/lookouts.json | 1 + .../knowledge_base/c4/docs/our-process.json | 1 + .../c4/docs/qa-gas-report-faq.json | 1 + .../c4/docs/security-is-about-people.json | 1 + .../c4/docs/severity-categorization.json | 1 + .../knowledge_base/c4/docs/solo-audits.json | 1 + qa_bot/knowledge_base/c4/docs/sponsors.json | 1 + .../c4/docs/submission-policy.json | 1 + .../c4/docs/tools-and-resources.json | 1 + .../knowledge_base/c4/docs/warden-auth.json | 1 + qa_bot/knowledge_base/c4/docs/wardens.json | 1 + .../c4/docs/where-can-i-find....json | 1 + .../c4/website/2023-01-reserve-contest.json | 1 + ...s-chain-services-ccip-and-arm-network.json | 1 + qa_bot/knowledge_base/c4/website/@0x52.json | 1 + qa_bot/knowledge_base/c4/website/@0xA5DF.json | 1 + .../knowledge_base/c4/website/@0xsomeone.json | 1 + .../c4/website/@HollaDieWaldfee.json | 1 + .../knowledge_base/c4/website/@IllIllI.json | 1 + qa_bot/knowledge_base/c4/website/@Jeiwan.json | 1 + qa_bot/knowledge_base/c4/website/@Trust.json | 1 + .../c4/website/@akshaysrivastav.json | 1 + .../knowledge_base/c4/website/@bin2chen.json | 1 + qa_bot/knowledge_base/c4/website/@cccz.json | 1 + .../knowledge_base/c4/website/@cmichel.json | 1 + .../knowledge_base/c4/website/@gpersoon.json | 1 + qa_bot/knowledge_base/c4/website/@gzeon.json | 1 + .../c4/website/@hansfriese.json | 1 + .../knowledge_base/c4/website/@hickuphh3.json | 1 + qa_bot/knowledge_base/c4/website/@hyh.json | 1 + qa_bot/knowledge_base/c4/website/@lambda.json | 1 + .../knowledge_base/c4/website/@leastwood.json | 1 + .../c4/website/@unforgiven.json | 1 + .../c4/website/@xiaoming90.json | 1 + qa_bot/knowledge_base/c4/website/bot.json | 1 + .../knowledge_base/c4/website/contests.json | 1 + qa_bot/knowledge_base/c4/website/help.json | 1 + .../c4/website/how-it-works.json | 1 + qa_bot/knowledge_base/c4/website/index.json | 1 + .../c4/website/leaderboard.json | 1 + .../c4/website/newsletter-signup.json | 1 + .../knowledge_base/c4/website/register.json | 1 + qa_bot/knowledge_base/c4/website/reports.json | 1 + .../c4/website/test-coverage.json | 1 + qa_bot/knowledge_base/c4/website/wardens.json | 1 + qa_bot/qa_bot.ipynb | 1698 +++++++++++++++++ 62 files changed, 1760 insertions(+) create mode 100644 qa_bot/knowledge_base/c4/docs/awarding-process.json create mode 100644 qa_bot/knowledge_base/c4/docs/backstage-wardens.json create mode 100644 qa_bot/knowledge_base/c4/docs/certified-contributors.json create mode 100644 qa_bot/knowledge_base/c4/docs/contest-process.json create mode 100644 qa_bot/knowledge_base/c4/docs/curve-logic.json create mode 100644 qa_bot/knowledge_base/c4/docs/email-protection.json create mode 100644 qa_bot/knowledge_base/c4/docs/fairness-and-validity.json create mode 100644 qa_bot/knowledge_base/c4/docs/frequently-asked-questions.json create mode 100644 qa_bot/knowledge_base/c4/docs/how-to-judge-a-contest.json create mode 100644 qa_bot/knowledge_base/c4/docs/how-we-work.json create mode 100644 qa_bot/knowledge_base/c4/docs/incentive-model-and-awards.json create mode 100644 qa_bot/knowledge_base/c4/docs/index.json create mode 100644 qa_bot/knowledge_base/c4/docs/intentionally-structured.json create mode 100644 qa_bot/knowledge_base/c4/docs/judges.json create mode 100644 qa_bot/knowledge_base/c4/docs/judging-criteria.json create mode 100644 qa_bot/knowledge_base/c4/docs/lookouts.json create mode 100644 qa_bot/knowledge_base/c4/docs/our-process.json create mode 100644 qa_bot/knowledge_base/c4/docs/qa-gas-report-faq.json create mode 100644 qa_bot/knowledge_base/c4/docs/security-is-about-people.json create mode 100644 qa_bot/knowledge_base/c4/docs/severity-categorization.json create mode 100644 qa_bot/knowledge_base/c4/docs/solo-audits.json create mode 100644 qa_bot/knowledge_base/c4/docs/sponsors.json create mode 100644 qa_bot/knowledge_base/c4/docs/submission-policy.json create mode 100644 qa_bot/knowledge_base/c4/docs/tools-and-resources.json create mode 100644 qa_bot/knowledge_base/c4/docs/warden-auth.json create mode 100644 qa_bot/knowledge_base/c4/docs/wardens.json create mode 100644 qa_bot/knowledge_base/c4/docs/where-can-i-find....json create mode 100644 qa_bot/knowledge_base/c4/website/2023-01-reserve-contest.json create mode 100644 qa_bot/knowledge_base/c4/website/2023-05-chainlink-cross-chain-services-ccip-and-arm-network.json create mode 100644 qa_bot/knowledge_base/c4/website/@0x52.json create mode 100644 qa_bot/knowledge_base/c4/website/@0xA5DF.json create mode 100644 qa_bot/knowledge_base/c4/website/@0xsomeone.json create mode 100644 qa_bot/knowledge_base/c4/website/@HollaDieWaldfee.json create mode 100644 qa_bot/knowledge_base/c4/website/@IllIllI.json create mode 100644 qa_bot/knowledge_base/c4/website/@Jeiwan.json create mode 100644 qa_bot/knowledge_base/c4/website/@Trust.json create mode 100644 qa_bot/knowledge_base/c4/website/@akshaysrivastav.json create mode 100644 qa_bot/knowledge_base/c4/website/@bin2chen.json create mode 100644 qa_bot/knowledge_base/c4/website/@cccz.json create mode 100644 qa_bot/knowledge_base/c4/website/@cmichel.json create mode 100644 qa_bot/knowledge_base/c4/website/@gpersoon.json create mode 100644 qa_bot/knowledge_base/c4/website/@gzeon.json create mode 100644 qa_bot/knowledge_base/c4/website/@hansfriese.json create mode 100644 qa_bot/knowledge_base/c4/website/@hickuphh3.json create mode 100644 qa_bot/knowledge_base/c4/website/@hyh.json create mode 100644 qa_bot/knowledge_base/c4/website/@lambda.json create mode 100644 qa_bot/knowledge_base/c4/website/@leastwood.json create mode 100644 qa_bot/knowledge_base/c4/website/@unforgiven.json create mode 100644 qa_bot/knowledge_base/c4/website/@xiaoming90.json create mode 100644 qa_bot/knowledge_base/c4/website/bot.json create mode 100644 qa_bot/knowledge_base/c4/website/contests.json create mode 100644 qa_bot/knowledge_base/c4/website/help.json create mode 100644 qa_bot/knowledge_base/c4/website/how-it-works.json create mode 100644 qa_bot/knowledge_base/c4/website/index.json create mode 100644 qa_bot/knowledge_base/c4/website/leaderboard.json create mode 100644 qa_bot/knowledge_base/c4/website/newsletter-signup.json create mode 100644 qa_bot/knowledge_base/c4/website/register.json create mode 100644 qa_bot/knowledge_base/c4/website/reports.json create mode 100644 qa_bot/knowledge_base/c4/website/test-coverage.json create mode 100644 qa_bot/knowledge_base/c4/website/wardens.json create mode 100644 qa_bot/qa_bot.ipynb diff --git a/.gitignore b/.gitignore index 485dee6..0a7a692 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .idea +.env +.ipynb_checkpoints \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/awarding-process.json b/qa_bot/knowledge_base/c4/docs/awarding-process.json new file mode 100644 index 0000000..31bb1c3 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/awarding-process.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/awarding/incentive-model-and-awards/awarding-process", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Awarding process](/awarding/incentive-model-and-awards/awarding-process)\n\n[Curve logic for QA and Gas optimization reports](/awarding/incentive-model-\nand-awards/curve-logic)\n\n[FAQ about QA and Gas Reports](/awarding/incentive-model-and-awards/qa-gas-\nreport-faq)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Awarding process\n\nThis is a high level overview of the C4 awarding process.\n\n##\n\nAwarding process\n\nAt the conclusion of an audit, sponsors review wardens' findings and express\ntheir opinions with regard to severity of issues. Judges evaluate input from\nboth and make the ultimate decision in terms of severity and validity of\nissues. (See [How to judge an audit](/roles/judges/how-to-judge-a-contest) for\nmore detail.)\n\nIn making their determination, judges add labels to Github issues, while the\noriginal submission data (including the warden's proposed severity rating) is\npreserved via a JSON data file.\n\nThe judge's decisions are reviewed by the sponsoring project team and by\n[+backstage wardens](https://docs.code4rena.com/roles/certified-\ncontributors/backstage-wardens) via a 48-hour QA process, to ensure fairness\nand quality.\n\nJudging data is used to generate the awards using Code4rena's award\ncalculation script, which factors in:\n\n * Risk level\n\n * Validity\n\n * Number of duplicates\n\n * Grade (A, B, C; Satisfactory/Unsatisfactory)\n\n * In some cases, \"partial duplicate\" status\n\nIt should be possible to reverse engineer awards using a combination of two\nCSV files:\n\n * \u200b[`findings.csv`](https://code4rena.com/community-resources/findings.csv): valid Code4rena findings\n\n * \u200b[`contests.csv`](https://code4rena.com/community-resources/contests.csv): Code4rena audits\n\nOnce awards are determined, we generate a CSV file enumerating funds to be\nsent. Distribution is then initiated using disperse.app and sent to multisig\nsigners for completion of payment.\n\n###\n\nIf you don't see your award in your wallet:\n\nFor most Code4rena contests, awards are sent on the Polygon network (usually\nin USDC) within 1-2 weeks of the awards announcement.\n\nIf awards have been sent, and you don't see them in your wallet, please verify\nthat you have imported the USDC token into your Polygon wallet. (MetaMask\ninstructions [here](https://support.metamask.io/hc/en-\nus/articles/360015489031-How-to-display-tokens-in-MetaMask).)\n\nIf you still don't see the award in your wallet, please [open a help desk\nticket](https://code4rena.com/help).\n\n##\n\nTax and legal questions\n\n> _Note: Do your own research; this is not legal or tax advice. You should\n> consult a professional in your area._\n\nWe are occasionally asked how wardens should declare Code4rena earnings for\ntax (or other financial/legal) purposes. Due to the nascent nature of DAOs, we\nare unable to provide reliable information in this area. You must assess and\ndetermine your own best course of action.\n\nAudit contest rewards are distributed by the DAO, which does not have a legal\npersonality.\n\nThe DAO has designated Code4rena Foundation as its agent via [a governance\naction](https://github.com/code-423n4/org/discussions/13) [approved by DAO\nmembers](https://polygonscan.com/tx/0x8fbe178e34a7ae03a5e0d1f49f23e38f3a1c0d1186a67920d33196a89f79da98)\nfor purposes of entering into contractual agreements. However, wardens are not\nin any contractual agreement with the Foundation [unless they are\ncertified](https://code4rena.com/certified-contributor-summary/).\n\n\u200b[Documentation of Code4rena Foundation can be found\nhere.](https://github.com/code-423n4/org/tree/main/foundation)\u200b\n\nCode4rena Foundation may or may not be able to provide further information;\ntheir contact information is below.\n\nCode4rena Foundation\n\n\u200b[Campbell Law](/cdn-cgi/l/email-\nprotection#55363438253730393915263c39233027263c313038343b34323038303b217b3e2c)\u200b\n\nP.O. Box 31489 2nd Floor Whitehall House 238 North Church Street George Town\nCayman Islands KY1-1206\n\n[Awarding - PreviousIncentive model and awards](/awarding/incentive-model-and-\nawards)[NextCurve logic for QA and Gas optimization\nreports](/awarding/incentive-model-and-awards/curve-logic)\n\nLast modified 4d ago\n\nOn this page\n\nAwarding process\n\nIf you don't see your award in your wallet:\n\nTax and legal questions\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/backstage-wardens.json b/qa_bot/knowledge_base/c4/docs/backstage-wardens.json new file mode 100644 index 0000000..397c753 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/backstage-wardens.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/certified-contributors/backstage-wardens", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\n[+Backstage wardens](/roles/certified-contributors/backstage-wardens)\n\n[Lookouts](/roles/certified-contributors/lookouts)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find \u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# +Backstage wardens\n\nCertified contributors who meet certain performance criteria within C4 gain\n\"+Backstage\" access to C4 audits, which includes:\n\n * Immediate access to findings repos after audits conclude;\n\n * Contributing to post-audit triage; and\n\n * Post-judging QA.\n\nThe minimum criteria to become +Backstage are as follows:\n\n 1. 1.\n\nBe approved as a Certified C4 contributor;\n\n 2. 2.\n\nParticipate as a warden in at least 3 Code4rena audits;\n\n 3. 3.\n\nHave at least 1 high severity finding OR 3 medium severity findings on the\n[Code4rena leaderboard](https://code4rena.com/leaderboard/), OR score A on a\nQA or Gas report;\n\n 4. 4.\n\nAbide by the Certified Contributor Terms and Conditions (see [application\nform](https://code4rena.com/certified-contributor-application/)).\n\n##\n\nTo request +Backstage access\n\nOnce you meet the eligibility criteria, submit a [Help Desk\nRequest](https://code4rena.com/help/) to request +Backstage access, and C4\nstaff will get you set up.\n\n##\n\nCertified contributor professional conduct guidelines\n\nContributors may lose their +Backstage role by violating the code of\nprofessional conduct as outlined in the certified contributor agreement. This\ncode asks wardens to:\n\n * take an objective, collegial, and intellectually open tone in considering and discussing all findings\n\n * treat wardens and sponsors, and all other Code4rena community members with respect and an assumption of positive intent\n\n * avoid engaging in any discussion and evaluation of issues they submitted themselves except to answer a question or provide additional context requested by a judge or sponsor\n\n * treat the contents of all findings as private and confidential until the audit report is made public.\n\n[Roles - PreviousCertified contributors](/roles/certified-\ncontributors)[NextLookouts](/roles/certified-contributors/lookouts)\n\nLast modified 2mo ago\n\nOn this page\n\nTo request +Backstage access\n\nCertified contributor professional conduct guidelines\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/certified-contributors.json b/qa_bot/knowledge_base/c4/docs/certified-contributors.json new file mode 100644 index 0000000..b70ba26 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/certified-contributors.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/certified-contributors", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\n[+Backstage wardens](/roles/certified-contributors/backstage-wardens)\n\n[Lookouts](/roles/certified-contributors/lookouts)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find \u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Certified contributors\n\nIn order to create opportunities for contributions which rely on establishment\nof trust, Code4rena allows community members to opt into certifying their\nidentity and entering into a simple agreement.\n\nContributors who have provided ID verification and a signed agreement may be\neligible to participate in:\n\n * Private or invite-only contests\n\n * Scout role (focused on scoping and pre-contest code intel)\n\n * \u200b[Judging](/roles/judges)\u200b\n\n * \u200b[\"Backstage\" warden opportunities](/roles/certified-contributors/backstage-wardens) (post-contest triage and post-judging QA)\n\n * Providing mitigation review services\n\n * Offering solo audit and consulting services through C4\n\nAdditional opportunities we are considering include:\n\n * Certain contest bonus token awards which may be restricted from US persons due to regulations or token grant agreements\n\n * May be a factor in maxing out awards in the future\n\n###\n\n **Certification process and constraints**\n\nC4 continues to focus on privacy, so our certification process is done through\na third party ([Provenance](https://provenancecompliance.com/)) that is bound\nby confidentiality. The certification process is as follows:\n\n 1. 1.\n\nAn eligible contributor submits the [Certified Contributor Application\nform](https://code4rena.com/certified-contributor-application/), and agrees to\nthe Certified Contributor Terms and Conditions (see the application form).\n\n 2. 2.\n\nThe DAO's AML/KYC agent, [Provenance](https://provenancecompliance.com/),\ncontacts the contributor to certify their identity.\n\n 3. 3.\n\nProvenance certifies a contributor as having completed their identity\nverification process, and having signed an agreement binding them to code of\nconduct and non-disclosure. Code4 Corporation and the Code4rena DAO do NOT\nhave access to personal information, simply the verified knowledge that the\ncontributor was certified.\n\n 4. 4.\n\nCode4 Corporation contacts the certified contributor to let them know their\napplication has been approved.\n\n####\n\nConstraints\n\n * In the event that a certified contributor was alleged to have violated the agreement, the Code4rena Cayman Foundation could hire an attorney to pursue remediation.\n\n * In the event a certified C4 contributor was alleged to be involved in an exploit, Provenance would provide identifying information to authorities.\n\n##\n\nFAQ\n\n####\n\nWhat is the purpose behind having an option for wardens to become certified,\nand why does ID verification factor into that?\n\nAside from bug reports, most security services are not trustless activities,\nand there really isn't a way to make them so without adding immensely\nunworkable complexity (who audits the auditors?) or putting disproportionate\nburden on trusted parties (\"sorry, cmichel and alex, for the sake of C4, you\nhave to verify this anon advice on this bugfix is legit and not elaborate\nsocial engineering\")\n\nThe wider the surface area of interaction with anonymous wardens, the more we\nneed to manage the complexity of both trust and sybil attacks. Keeping\nabsolute anonymity limited to bug reports is where the line can be comfortably\ndrawn.\n\nBut even beyond our own comfort, ultimately a C4 audit is a _productized\nservice_ and sponsors are _customers_. Customers of security products and\nservices generally have higher level of anxiety around trust--and justifiably\neven more so in the dark forest of smart contract security. This approach\nprovides a path for accountability in the extremely unlikely event of\nmalicious behavior, yes, but it also provides a way for us to know that\nwardens have verifiably signed an agreement regarding non-disclosure,\nprofessionalism, and respect for privileged information from sponsors that\nthey presumably intend to honor.\n\nNote that certified wardens are still able to be anon for basically all\nintents and purposes. Unless the shit hit the fan, the C4 DAO wouldn't know\nwho they were, the Foundation wouldn't know who they were, Code4 Corp wouldn't\nknow who they were--just Provenance and attorneys who'd be required to keep\nthat information confidential.\n\n####\n\nWhat would constitute a warden being alleged to have performed an attack?\n\nFirst, let's define \"allegation\": an accusation based on circumstantial\nevidence would not be sufficient, as Provenance (the certifier) would have a\nduty to keep personal information confidential. There would need to be a\nformal, legal allegation wherein Provenance was **subpoenaed** to provide the\ninformation as verification of identity.\n\nSo this is really only relevant in a case where there is sufficient evidence\nthat would stand up in a court of law clearly demonstrating that the specific\nwarden in question was involved in an attack. Provenance would not dox wardens\nat the request of people grasping at straws in a witch hunt.\n\n####\n\nWhat documents are accepted by Provenance in relation to proof of residence?\n\nProvenance will provide exact details after application submission. As of June\n1, 2022, the list of acceptable documents includes:\n\n * Utility bill clearly stating the service address and mailing address with the individual's name (note: telephone, cellular and credit card bills are not acceptable as these may be mailed to any address)\n\n * Bank statement\n\n * Rental or lease agreement\n\n * Local authority document (e.g. property tax bill, council tax bill etc.)\n\nNote: the document has to be less than 3 months old.\n\n####\n\nHow long does it take to get certified?\n\nOnce you submit the [Certified Contributor Application\nform](https://code4rena.com/certified-contributor-application/), Provenance\ntypically emails you within one business day. (If you don't see an email\nwithin that time frame, we recommend checking your spam folder for email from\n`@provenance.company` or `provenancecompliance.com`.)\n\nProvenance will provide you with detailed instructions. If you have all the\navailable documents, the process can usually be completed within a day.\n\nIt will take longer if you need to assemble the necessary documents.\n\n[PreviousHow to judge an audit](/roles/judges/how-to-judge-a-\ncontest)[Next+Backstage wardens](/roles/certified-contributors/backstage-\nwardens)\n\nLast modified 3d ago\n\nOn this page\n\nCertification process and constraints\n\nFAQ\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/contest-process.json b/qa_bot/knowledge_base/c4/docs/contest-process.json new file mode 100644 index 0000000..860b83f --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/contest-process.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/sponsors/contest-process", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Audit process](/roles/sponsors/contest-process)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Audit process\n\n###\n\nBefore the audit\n\n * If you haven't already, join the [C4 Discord](https://discord.gg/YgBwyreF9B) server and let us know you're interested in sponsoring an audit in the `#\ud83d\udcbci-want-c4-to-audit-our-code` channel.\n\n * We'll ask you to share your current smart contracts and answer a few questions about the scope you'd like wardens to focus on. If you decide to move ahead with an audit, **the relevant code will be made public** at the time of your audit.\n\n * After your code has been reviewed, Code4rena staff will contact you to iron out the details.\n\n * Once we've received a deposit, we will finalize scheduling and begin to promote the audit.\n\n###\n\nDuring the audit\n\n * Be prepared for a **code freeze for the duration of the audit** -- important because it establishes a level playing field. We want to ensure everyone's looking at the same code, no matter when they look during the audit. (Note: this includes your own repo, since a PR can leak alpha to our wardens!)\n\n * We ask for a member or members of your engineering team to be available in the C4 Discord server in order to answer wardens' questions via DM.\n\n * Please avoid discussing any issues submitted by wardens in an open channel, as this could give hints to other wardens.\n\n###\n\nAfter the audit\n\nYour work will play a role in developing a public report of the audit.\n\n * Sponsors review findings, help identify duplicates, and provide comments as you confirm, acknowledge, or dispute wardens' findings.\n\n * As your team works to mitigate issues, most sponsors will create a PR for each issue addressed in your codebase and link to it in the C4 finding issue and label the finding as resolved.\n\n[Roles - PreviousSponsors](/roles/sponsors)[Next \\-\nRolesJudges](/roles/judges)\n\nLast modified 2mo ago\n\nOn this page\n\nBefore the audit\n\nDuring the audit\n\nAfter the audit\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/curve-logic.json b/qa_bot/knowledge_base/c4/docs/curve-logic.json new file mode 100644 index 0000000..1abf104 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/curve-logic.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/awarding/incentive-model-and-awards/curve-logic", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Awarding process](/awarding/incentive-model-and-awards/awarding-process)\n\n[Curve logic for QA and Gas optimization reports](/awarding/incentive-model-\nand-awards/curve-logic)\n\n[FAQ about QA and Gas Reports](/awarding/incentive-model-and-awards/qa-gas-\nreport-faq)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Curve logic for QA and Gas optimization reports\n\nThe reports will be graded based on 3 different grades:\n\n * Grade-a: outstanding report.\n\n * Grade-b: satisfactory report.\n\n * Grade-c: unsatisfactory report.\n\nEach grade will be allocated a portion of the pool, with a decrementer of 0.6\nbetween, and steps of 0.2.\n\n####\n\nWhat happens if there are tied report scores?\n\nIf two or more QA (or gas optimization) reports have tied scores, they split\nthe _total_ awards for the slots they would otherwise occupy -- i.e. if two\nwardens tie for 3rd place, they split the total awards for 3rd and 4th place.\nOr if three wardens tie for 3rd, they split the total awards for 3rd, 4th, and\n5th place.\n\n####\n\nWhat if there are no high or medium severity findings?\n\nTotal findings pool will be split based on QA report scores unless other\narrangements are made.\n\n####\n\nCan I see some examples of how awards work?\n\nAwards for each contest are [posted on the Code4rena\nwebsite](https://code4rena.com/contests). See\n[Numoen](https://code4rena.com/contests/2023-01-numoen-findings), for example.\nThe award calculation for Numoen had the following parameters:\n\n * **Total awards: 50,000 USDC**\n\n * Main award pool: 42,500 USDC\n\n * QA pool: 5,000 USDC\n\n * Gas pool: 2,500 USDC\n\nThe table below shows each unique high and medium severity finding (`H-XX`,\n`M-XX`), QA report (`Q-XX`), gas optimization report (`G-XX`), and the way\neach submission's award was calculated:\n\n * `pie` is the number of shares assigned to that report or finding\n\n * `split` is the number of times those shares were divided\n\n * `slice` is the number of shares assigned for that warden's finding\n\n * each `award` is calculated by `shares * (pot / number_of_shares)`\n\n **Tribe Turbo awards**\n\n **handle**\n\n|\n\n **finding**\n\n|\n\n **risk**\n\n|\n\n **pie**\n\n|\n\n **split**\n\n|\n\n **slice**\n\n|\n\n **award** \n \n---|---|---|---|---|---|--- \n \n'hansfriese'\n\n|\n\n'H-01'\n\n|\n\n'3'\n\n|\n\n13\n\n|\n\n1\n\n|\n\n13\n\n|\n\n17615.514252816203 \n \n'RaymondFam'\n\n|\n\n'M-01'\n\n|\n\n'2'\n\n|\n\n2.0863980000000004\n\n|\n\n5\n\n|\n\n0.5117580000000002\n\n|\n\n693.4523340763628 \n \n'0xhacksmithh'\n\n|\n\n'M-01'\n\n|\n\n'2'\n\n|\n\n2.0863980000000004\n\n|\n\n5\n\n|\n\n0.39366000000000007\n\n|\n\n533.424872366433 \n \n'Deivitto'\n\n|\n\n'M-01'\n\n|\n\n'2'\n\n|\n\n2.0863980000000004\n\n|\n\n5\n\n|\n\n0.39366000000000007\n\n|\n\n533.424872366433 \n \n'rvierdiiev'\n\n|\n\n'M-01'\n\n|\n\n'2'\n\n|\n\n2.0863980000000004\n\n|\n\n5\n\n|\n\n0.39366000000000007\n\n|\n\n533.424872366433 \n \n'peakbolt'\n\n|\n\n'M-01'\n\n|\n\n'2'\n\n|\n\n2.0863980000000004\n\n|\n\n5\n\n|\n\n0.39366000000000007\n\n|\n\n533.424872366433 \n \n'hansfriese'\n\n|\n\n'M-02'\n\n|\n\n'2'\n\n|\n\n3.9\n\n|\n\n1\n\n|\n\n3.9000000000000004\n\n|\n\n5284.654275844861 \n \n'Allarious'\n\n|\n\n'M-03'\n\n|\n\n'2'\n\n|\n\n3.9\n\n|\n\n1\n\n|\n\n3.9000000000000004\n\n|\n\n5284.654275844861 \n \n'hansfriese'\n\n|\n\n'M-04'\n\n|\n\n'2'\n\n|\n\n3.1050000000000004\n\n|\n\n2\n\n|\n\n1.7550000000000001\n\n|\n\n2378.0944241301877 \n \n'peakbolt'\n\n|\n\n'M-05'\n\n|\n\n'2'\n\n|\n\n2.268\n\n|\n\n3\n\n|\n\n1.0530000000000002\n\n|\n\n1426.8566544781127 \n \n'nadin'\n\n|\n\n'M-04'\n\n|\n\n'2'\n\n|\n\n3.1050000000000004\n\n|\n\n2\n\n|\n\n1.35\n\n|\n\n1829.3034031770674 \n \n'adeolu'\n\n|\n\n'M-05'\n\n|\n\n'2'\n\n|\n\n2.268\n\n|\n\n3\n\n|\n\n0.405\n\n|\n\n548.7910209531202 \n \n'rvierdiiev'\n\n|\n\n'M-05'\n\n|\n\n'2'\n\n|\n\n2.268\n\n|\n\n3\n\n|\n\n0.81\n\n|\n\n1097.5820419062404 \n \n'Breeje'\n\n|\n\n'M-06'\n\n|\n\n'2'\n\n|\n\n3.1050000000000004\n\n|\n\n2\n\n|\n\n1.35\n\n|\n\n1829.3034031770674 \n \n'ladboy233'\n\n|\n\n'M-06'\n\n|\n\n'2'\n\n|\n\n3.1050000000000004\n\n|\n\n2\n\n|\n\n1.7550000000000001\n\n|\n\n2378.0944241301877 \n \n'Deivitto'\n\n|\n\n'G-01'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'Aymen0909'\n\n|\n\n'G-02'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'matrix_0wl'\n\n|\n\n'G-03'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'RaymondFam'\n\n|\n\n'G-04'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'c3phas'\n\n|\n\n'G-05'\n\n|\n\n'g'\n\n|\n\n140\n\n|\n\n2\n\n|\n\n70\n\n|\n\n551.0959153628928 \n \n'Rageur'\n\n|\n\n'G-06'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'nadin'\n\n|\n\n'G-07'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'IllIllI'\n\n|\n\n'G-08'\n\n|\n\n'g'\n\n|\n\n140\n\n|\n\n2\n\n|\n\n70\n\n|\n\n551.0959153628928 \n \n'cryptostellar5'\n\n|\n\n'G-09'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'Diana'\n\n|\n\n'G-10'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'antonttc'\n\n|\n\n'G-11'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'0xackermann'\n\n|\n\n'G-12'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'0xSmartContract'\n\n|\n\n'G-13'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'ReyAdmirado'\n\n|\n\n'G-14'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'NoamYakov'\n\n|\n\n'G-15'\n\n|\n\n'g'\n\n|\n\n91\n\n|\n\n1\n\n|\n\n91\n\n|\n\n716.4246899717607 \n \n'Rolezn'\n\n|\n\n'G-16'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'oyc_109'\n\n|\n\n'G-17'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'arialblack14'\n\n|\n\n'G-18'\n\n|\n\n'g'\n\n|\n\n86.5490783392284\n\n|\n\n15\n\n|\n\n5.76993855594856\n\n|\n\n45.42556528683028 \n \n'matrix_0wl'\n\n|\n\n'Q-01'\n\n|\n\n'q'\n\n|\n\n38.296345887055885\n\n|\n\n5\n\n|\n\n7.659269177411177\n\n|\n\n142.48406332285143 \n \n'SleepingBugs'\n\n|\n\n'Q-02'\n\n|\n\n'q'\n\n|\n\n38.296345887055885\n\n|\n\n5\n\n|\n\n7.659269177411177\n\n|\n\n142.48406332285143 \n \n'CodingNameKiki'\n\n|\n\n'Q-03'\n\n|\n\n'q'\n\n|\n\n69.68\n\n|\n\n1\n\n|\n\n69.68\n\n|\n\n1296.2450205584805 \n \n'IllIllI'\n\n|\n\n'Q-04'\n\n|\n\n'q'\n\n|\n\n160.8\n\n|\n\n3\n\n|\n\n53.6\n\n|\n\n997.1115542757543 \n \n'0xAgro'\n\n|\n\n'Q-05'\n\n|\n\n'q'\n\n|\n\n38.296345887055885\n\n|\n\n5\n\n|\n\n7.659269177411177\n\n|\n\n142.48406332285143 \n \n'0xSmartContract'\n\n|\n\n'Q-06'\n\n|\n\n'q'\n\n|\n\n160.8\n\n|\n\n3\n\n|\n\n53.6\n\n|\n\n997.1115542757543 \n \n'btk'\n\n|\n\n'Q-07'\n\n|\n\n'q'\n\n|\n\n160.8\n\n|\n\n3\n\n|\n\n53.6\n\n|\n\n997.1115542757543 \n \n'chrisdior4'\n\n|\n\n'Q-08'\n\n|\n\n'q'\n\n|\n\n38.296345887055885\n\n|\n\n5\n\n|\n\n7.659269177411177\n\n|\n\n142.48406332285143 \n \n'Rolezn'\n\n|\n\n'Q-09'\n\n|\n\n'q'\n\n|\n\n38.296345887055885\n\n|\n\n5\n\n|\n\n7.659269177411177\n\n|\n\n142.48406332285143 \n \n[PreviousAwarding process](/awarding/incentive-model-and-awards/awarding-\nprocess)[NextFAQ about QA and Gas Reports](/awarding/incentive-model-and-\nawards/qa-gas-report-faq)\n\nLast modified 3mo ago\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/email-protection.json b/qa_bot/knowledge_base/c4/docs/email-protection.json new file mode 100644 index 0000000..d0e930b --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/email-protection.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/cdn-cgi/l/email-protection", "md_content": "Please enable cookies.\n\n# Email Protection\n\n## You are unable to access this email address docs.code4rena.com\n\nThe website from which you got to this page is protected by Cloudflare. Email\naddresses on that page have been hidden in order to keep them from being\naccessed by malicious bots. **You must enable Javascript in your browser in\norder to decode the e-mail address**.\n\nIf you have a website and are interested in protecting it in a similar way,\nyou can [sign up for Cloudflare](https://www.cloudflare.com/sign-\nup?utm_source=email_protection).\n\n * [How does Cloudflare protect email addresses on website from spammers?](https://support.cloudflare.com/hc/en-us/articles/200170016-What-is-Email-Address-Obfuscation-)\n * [Can I sign up for Cloudflare?](https://support.cloudflare.com/hc/en-us/categories/200275218-Getting-Started)\n\nCloudflare Ray ID: **8019948b8fc542c8** \u2022 Your IP: Click to reveal\n143.244.47.100 \u2022 Performance & security by\n[Cloudflare](https://www.cloudflare.com/5xx-error-landing)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/fairness-and-validity.json b/qa_bot/knowledge_base/c4/docs/fairness-and-validity.json new file mode 100644 index 0000000..a8df1fa --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/fairness-and-validity.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/awarding/fairness-and-validity", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Fairness and validity\n\nFairness, validity, and consistency\n\n##\n\nFundamental principles\n\nThese are the fundamental principles that underly how we look at the question\nof \"fairness\" in Code4rena.\n\n 1. 1.\n\nCode4rena aims to be a fair and impartial system.\n\n 2. 2.\n\nWhere the system is insufficient or vague, we depend on the judgment of fair\nand impartial individuals.\n\n 3. 3.\n\nWhen we depend too heavily on the judgment of individuals, we work to improve\nthe system long-term in iterative and sustainable ways.\n\n 4. 4.\n\nBecause we are working every day within the constraints of the systems we\nhave, we aim to be patient with the time and consideration that improvement\ntakes and gracious toward the individuals tasked by the system with making\ndifficult decisions.\n\nIt may be worth reading [this longer\npiece](https://github.com/code-423n4/org/discussions/36) on the topic of how\nour system has evolved.\n\n##\n\nExpectations of participants\n\n * Sponsors should be able to trust that Code4rena as a system is working to help them secure their code and that their funds are a good investment toward that end.\n\n * Wardens should be able to have clear rule expectations of contests they contribute to--as clear as possible within the constraints.\n\n * Judges should be impartial and free to act independently to do what they see best in a given contest within the guidelines they are provided.\n\n##\n\nRole of hired staff (Code4 Corporation)\n\nThe role of staff is regulatory, supportive, and administrative:\n\n * Code4 is a neutral party in contests dedicated to serving and collaborating with all sides of the market in driving the success of Code4rena as a platform.\n\n * Code4 is responsible for improving documentation, process, and tools in support of the goals and expectations of each of the parties involved in Code4rena, providing information, context, and guidance to sponsors, judges, and wardens within the rules.\n\n * Code4 has no role in determining the outcomes of findings and does not put its hand on the scale in individual contests.\n\n * Code4 does have a role to provide sponsors, judges, and wardens with historical context on the intent of rules so that those rules can be applied appropriately when ambiguity is present.\n\n##\n\nWhat constitutes a 'valid' report'?\n\nThe validity of an audit report submission is not based on whether it is\n'true' or not. A report may contain a finding which is factually 'true' (the\nmost literal interpretation of 'valid'), but if it does not add value or if it\nis not presented in such a way that adds value to a sponsor, it may be deemed\ninvalid by a judge.\n\nThis may seem harsh and exclusive, but it is essential to consider that\nCode4rena runs audit contests, not gotcha-hunts, and Code4rena offers\nguaranteed payout for valid submissions. This means that wardens are providing\na service to sponsors and the product of those services should meet what\njudges feel is a minimum standard in order to be deemed of value.\n\nAuditing is serious, disciplined work that should provide high value\nconsultative expertise to the people paying for the work.\n\nIn that light, judges are right to have high standards. Some judges have\nalways had higher standards than others, and some judges have applied higher\nstandards in later contests than they did in earlier ones.\n\nWhile this may be seen as 'inconsistent', it is also true that standards\nwithin a specific contest will always be informed by the overall quality of a\ncontest's submissions, and that the standard in a judge's mind is always going\nto be evolving based on the aggregate quality of submissions that judge has\nbeen exposed to and the decisions other judges have made.\n\nThe correct assessment when this happens is not that a judge is being\ninconsistent, it is that they have objectively observed that the quality of\ncompetition has increased, and that observation shapes their view of the whole\nset of submissions; they are consistent in valuing submissions in the context\nof each other, which is a central way that performance in a competition is\nmeasured.\n\n##\n\nIf you disagree with a judge's decision\n\nIf you disagree with a decision, and you do not have [the +backstage\nrole](https://docs.code4rena.com/roles/certified-contributors/backstage-\nwardens), there's nothing further that can be done or changed; the judge's\ndecisions are final.\n\nHowever, if the concern regarding judging is focused on a matter of\ninconsistency or process or lack of clarity in the rules, you are encouraged\nto review the issues in https://github.com/code-423n4/org/issues and:\n\n 1. 1.\n\nSee if one of the problems described there matches the type of issue you have\nexperienced. If so, add a purely fact-based comment with additional\ninformation and another point of evidence of it being a challenge.\n\n 2. 2.\n\nSee if any of the suggestions described there would be useful to improving the\ncase you have in mind. If so, feel free to add your thoughts in support.\n\n 3. 3.\n\nIF a relevant type of issue is not already addressed there which doesn't\nrepresent the categorical concern you have, you can feel free to open an\nissue.\n\nThe purpose of issues in that repo is not to post grievances about specific\nissues but about to identify places where the process can be improved and ways\nwe can improve it.\n\n##\n\nContinued evolution of rules\n\n###\n\nRubric\n\nBecause wardens should be able to have clear rule expectations of contests\nthey contribute to, and because newer wardens do not have historical context\non the intent of various rules, it is important that we continue to document a\nrubric of what constitutes the subjective threshold of validity.\n\nAn initial rubric has been outlined\n[here](https://github.com/code-423n4/org/discussions/34) and a finalized\nversion of this rubric will soon be added to formal documentation and judging\nprocedure.\n\nNote well:\n\n * the purpose of this proposed rubric is not to be 'more strict'. It's to continue to work toward a standard and mutually agreed expectations as to what constitutes the base level of quality for a submission.\n\n * the scale of this rubric hasn't been used yet (someone who scored a 5 or a 10 or a 3 on some prior QA report was doing so on a band where 1 to 100 is the equivalent of 60 to 100 in the new rubric)\n\n * by the time we ask judges to implement this, we will have a Chrome extension in place that will aid them in scoring and which will have the rubric visible to them as they grade so they are aware of the implied meaning of their grade per the rubric.\n\n[PreviousSeverity Categorization](/awarding/judging-criteria/severity-\ncategorization)[Next \\- PhilosophySecurity is about\npeople](/philosophy/security-is-about-people)\n\nLast modified 4d ago\n\nOn this page\n\nFundamental principles\n\nExpectations of participants\n\nRole of hired staff (Code4 Corporation)\n\nWhat constitutes a 'valid' report'?\n\nIf you disagree with a judge's decision\n\nContinued evolution of rules\n\nRubric\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/frequently-asked-questions.json b/qa_bot/knowledge_base/c4/docs/frequently-asked-questions.json new file mode 100644 index 0000000..717a4d7 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/frequently-asked-questions.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/structure/frequently-asked-questions", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# FAQ\n\n###\n\nWhat is Code4rena?\n\n\u200b[Code4rena](https://code4rena.com/) is a competitive audit platform that\nfinds more high-severity vulnerabilities, more quickly than any other auditing\nmethod. Built by a team of leading industry experts, Code4rena is designed to\nprotect your project and community by providing access to the best security\nresearchers and smart contract experts in the world.\n\n###\n\nHow is Code4rena different from other audit companies?\n\nAt Code4rena, we live by the motto \"[the more eyes on the code, the more bugs\nfound](https://www.youtube.com/watch?v=O1rKwDv5kLQ)\". By harnessing the power\nof the Code4rena community, projects have access to unmatched value when\nconsidering engineer hours of code review.\n\nOur platform is designed to incentivize everyone to participate in finding\nvulnerabilities. In contrast to other audit companies or bug bounty\nfacilitators, there isn't a race to find the biggest one as fast as possible.\nEveryone who puts in the effort and finds something valid will be rewarded,\nthereby increasing participation levels and the scope of vulnerabilities\nfound.\n\n###\n\nCan anyone participate?\n\nIn short, yes! Anyone can become a Code4rena Warden, and plenty of resources\nare available to learn more and earn rewards. You can find out more about this\nin our [Discord](https://discord.gg/code4rena).\n\n###\n\nWhat's the difference between Wardens and Masons?\n\nThe simplest way to define the difference between Wardens and Masons is this:\nWardens contribute to the ecosystem by auditing code and identifying\nvulnerabilities, while Masons leverage unique skills outside of auditing to\ncontribute. Examples of Mason contributions could include things like\nexplainer videos, blogs, mentorship programs etc.\n\n###\n\nHow do I sign up to be a Warden?\n\nJump into our [Discord](https://discord.gg/code4rena) and get started! From\nthere, you'll need to [register](https://code4rena.com/register) as a Warden,\nthen say hi in our #i-want-to-be-a-warden Discord channel.\n\n###\n\nHow can I become a Judge?\n\nComplete [this form](https://code4rena.com/judge-application/) and share:\nShort bio/intro and summary of relevant experience, links that help\ndemonstrate your expertise, 3 example submissions to Code4rena contests that\nwere judged high severity, description of how each submission demonstrates\nyour depth of knowledge.\n\n###\n\nHow can I become a Scout?\n\nYou can't, just yet! Right now, Scouts are hand-picked by the C4 team as it's\na highly sensitive role. We're looking at the possibility of opening up this\nprocess, but not in the near future.\n\n###\n\nI want Code4rena to audit my project, where do I start?\n\nIt's really simple! Just visit [this\nlink](https://code4rena.typeform.com/i-want-an-audit) and fill out the form.\nOur team will be in touch with you shortly after you've completed it.\n\n###\n\nDo you have a blog?\n\nWe do indeed, [here](https://medium.com/code-423n4). We post product updates,\nsponsor interviews and more.\n\n###\n\nWhat's the best way to stay up to date with Code4rena?\n\n\u200b[Follow us](https://twitter.com/code4rena) on Twitter and turn on\nnotifications in our #announcements channel in\n[Discord](https://discord.gg/code4rena).\n\n###\n\nWhere can I view the audit results?\n\nOnce an audit's results have been finalized, they'll be shared in our\n#announcements channel in Discord. The audit's page in the\n'[Audits](https://code4rena.com/contests)' section on our website will also be\nupdated to show results.\n\n###\n\nWhere can I read past Code4rena audit reports?\n\nWe push all public audit reports to the\n'[Reports](https://code4rena.com/reports)' section on our website. This\nenables you to read through past findings, evaluate commonalities, and\ncontinue your learning journey. Our [GitHub](https://github.com/code-423n4/)\nrepos are also public, for those of you interested in diving deeper.\n\n###\n\nHow are audits judged?\n\nWe have a lot of documents outlining the technicalities of our judging\nprocess, which you can view\n[here](https://docs.code4rena.com/roles/judges/how-to-judge-a-contest). In\nshort, Judges follow stringent criteria, whilst making sure submissions adhere\nto C4 policies. We've also implemented a severity standardization guide to\nensure a homogenized approach.\n\n##\n\nWarden FAQ\n\n###\n\nHow do I submit issues to an audit?\n\nSimply navigate to the audit page, click on the `Submit Findings` button, and\nfill out the form with your findings. Make sure to validate your wallet\naddress beforehand.\n\n###\n\nHow can I confirm that Code4rena has received my finding?\n\nSimply head over to the specific audit page and navigate to the `Findings`\ntab. If you see your issue listed, then it is already received by the C4 team.\n\nYou should also receive an email confirmation from [[email protected]](/cdn-\ncgi/l/email-protection) (If you don't see it in your inbox, check your spam\nfolder.)\n\n###\n\nI submitted a finding but then realized it was invalid. Do I need to contact\nCode4rena?\n\nYou can go to the `Findings` tab on the audit page and open the specific\nfinding. There you will see an option to `Withdraw` the finding.\n\n###\n\nCan I edit my findings post-submission?\n\nYes! Go to the `Findings` tab on the audit page and open the concerned\nfinding, make edits and submit changes. This option is only available until\nthe audit deadline, though.\n\n###\n\nWhat if I want to change my wallet address?\n\nYou can change your payment information at any time. Simply log in and go to\n[your account management page](https://code4rena.com/account).\n\nNote: for each audit, C4 distributes awards to the payment address on file _at\nthe time of award calculation_.\n\n###\n\nWhy was my finding marked as invalid?\n\nThe validity of an audit report submission is not based on whether it is\n'true' or not. A report may contain a finding which is factually 'true' (the\nmost literal interpretation of 'valid'), but if it does not add value or if it\nis not presented in such a way that adds value to a sponsor, it may be deemed\ninvalid by a judge.\n\n###\n\nWhy was my finding downgraded from the severity I gave it?\n\nExplaining and rationalizing the potential impact is an essential part of a\nquality submission. The burden of proof increases based on the potential value\nof the submission (rarity, severity). Judges will refer to the criteria on\n[this page](https://docs.code4rena.com/awarding/judging-criteria#estimating-\nrisk-tl-dr) to ascertain whether a finding matches the severity a warden has\ninitially given it.\n\n###\n\nDo Judges have the final say?\n\nThey do. Judges review findings and sponsor input, and come to their own\nindependent conclusion in alignment with our criteria. Doing this ensures the\nimpartiality of the process. You can read more about our approach to this\n[here](https://docs.code4rena.com/awarding/judging-criteria).\n\n###\n\nIs the judging criteria set in stone, or does it change?\n\nWe're an organization that aims to refine our processes wherever and whenever\npossible. At present, there is [this\npage](https://github.com/code-423n4/org/issues) on our GitHub to discuss open\nissues in regards to judging standardization.\n\n###\n\nIf I've got questions about the severity I should assign to a finding, where\nshould I go?\n\nIn the C4 Discord, these types of questions are commonly asked in #questions\nand/or #wardens.\n\n[Philosophy - PreviousIntentionally structured](/philosophy/intentionally-\nstructured)[Next \\- Other DetailsAudit timeline](/structure/our-process)\n\nLast modified 2mo ago\n\nOn this page\n\nWhat is Code4rena?\n\nHow is Code4rena different from other audit companies?\n\nCan anyone participate?\n\nWhat's the difference between Wardens and Masons?\n\nHow do I sign up to be a Warden?\n\nHow can I become a Judge?\n\nHow can I become a Scout?\n\nI want Code4rena to audit my project, where do I start?\n\nDo you have a blog?\n\nWhat's the best way to stay up to date with Code4rena?\n\nWhere can I view the audit results?\n\nWhere can I read past Code4rena audit reports?\n\nHow are audits judged?\n\nWarden FAQ\n\nHow do I submit issues to an audit?\n\nHow can I confirm that Code4rena has received my finding?\n\nI submitted a finding but then realized it was invalid. Do I need to contact\nCode4rena?\n\nCan I edit my findings post-submission?\n\nWhat if I want to change my wallet address?\n\nWhy was my finding marked as invalid?\n\nWhy was my finding downgraded from the severity I gave it?\n\nDo Judges have the final say?\n\nIs the judging criteria set in stone, or does it change?\n\nIf I've got questions about the severity I should assign to a finding, where\nshould I go?\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/how-to-judge-a-contest.json b/qa_bot/knowledge_base/c4/docs/how-to-judge-a-contest.json new file mode 100644 index 0000000..0e61e8b --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/how-to-judge-a-contest.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/judges/how-to-judge-a-contest", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[How to judge an audit](/roles/judges/how-to-judge-a-contest)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# How to judge an audit\n\n##\n\nTimeline\n\nIdeally we would like audits to be judged in 48 hours after handoff.\n\nWe ask that you try to complete the judging process quickly so that we can\ndistribute awards to wardens promptly. If you need more time, please\ncommunicate that to C4 as soon as possible.\n\n##\n\nHere's how the process works leading up to judging\n\nC4 kicks off the code competition and establishes a private repo to receive\nincoming issues. Typically, most findings come in on the last day of the\naudit. When the audit ends, a Lookout will presort the repo and then it will\nbe handed to the sponsor. Sponsors will have the chance to review the\nfindings, comment, and provide feedback on issues.\n\nSponsor input is non-binding, and do note that sponsors are heavily biased\nagainst having a report that includes very many vulnerabilities. Focus your\nwork as a judge on protecting users and providing feedback to wardens.\n\n##\n\nBefore you get started\n\nRead the [Judging Criteria](https://docs.code4rena.com/roles/wardens/judging-\ncriteria), [Submission Policy](/roles/wardens/submission-policy), and review\nthe audit readme as provided by the sponsor.\n\nYou may also be interested in browsing past audits, and [reviewing open issues\nin the Rulebook repo](https://github.com/code-423n4/rulebook/issues), in order\nto see how other judges have handled issues.\n\n##\n\nReviewing submissions\n\nWhen your judge application is approved, C4 staff will contact you to invite\nyou to our Github organization and provide you with technical documentation on\nour judging tools. Those documents also includes all information regarding de-\nduping, grading QA/Gas and other judging tasks.\n\n##\n\nNotes on judging\n\n * Review the [Judging criteria](https://docs.code4rena.com/roles/wardens/judging-criteria).\n\n * Consider the sponsor's feedback, but keep in mind that it's not always going to be objective.\n\n * Any submissions that do not apply specifically to the functionality of the smart contract logic itself should be considered QA.\n\n * When weighing in on severity or validity of an issue, leave a comment describing your justification for any changes you make to the warden's assessment of severity.\n\n * When necessary, cross reference the submission with the codebase to validate the legitimacy of the proposed submission.\n\n * Unless there is something uniquely novel created by combining vectors, most submissions regarding vulnerabilities that are inherent to a particular system or the Ethereum network as a whole should be considered QA. Examples of such vulnerabilities include front running, sandwich attacks, and MEV. In such events, leave a comment on the issue:\n\n> \"Sandwich attacks are inherent to AMMs, so this isn't a unique issue\n> presented by the MarginSwap implementation. With this in mind, I'm\n> downgrading the risk from a proposed medium severity to QA.\"\n\nOne important caveat to all of the above: **_unless otherwise specified by the\naudit sponsor or intended to be handled by the code_** **.** For example,\nflash loans are generally unavoidable, but since MarginSwap had a safeguard\nagainst them, we considered these findings relevant in their contest.\n\n##\n\nDiscussing issues with the sponsor\n\nUltimately the judge has the final word, but we want your decisions to be\nwell-informed. In a typical C4 audit, there will be a few issues that benefit\nfrom discussion with the sponsor; the judge may find that their understanding\nof the system is incomplete and you need to ask for clarification, or where\nthere is room for misunderstanding. Don't hesitate to connect directly with\nthe sponsor, either in the Github comments (where you can tag them in if\nneeded), or via Discord.\n\n##\n\nIf you have questions\n\nDo not hesitate to post in the #judges Discord channel, or DM a Contest\nAdministrator with questions as you're working on judging. Any questions or\nfeedback you can add to this documentation, or comments/questions on items\nabove are highly welcome and essential for us improving our process. Thank\nyou! \ud83d\ude4f\n\n##\n\nFinal step before handing off\n\nPlease add a comment to your top scoring QA report noting where there are any\nitems that you disagreed with the severity listed and/or any items that were\ninvalid. These comments will be integrated into the final report.\n\n##\n\nWhen you're done reviewing\n\nPing a C4 Contest Administrator and let us know you're ready to hand off the\nresults for post-judge QA and then award distribution.\n\n[Roles - PreviousJudges](/roles/judges)[Next \\- RolesCertified\ncontributors](/roles/certified-contributors)\n\nLast modified 2mo ago\n\nOn this page\n\nTimeline\n\nHere's how the process works leading up to judging\n\nBefore you get started\n\nReviewing submissions\n\nNotes on judging\n\nDiscussing issues with the sponsor\n\nIf you have questions\n\nFinal step before handing off\n\nWhen you're done reviewing\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/how-we-work.json b/qa_bot/knowledge_base/c4/docs/how-we-work.json new file mode 100644 index 0000000..b1886ab --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/how-we-work.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/philosophy/how-we-work", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# The culture we're building\n\n###\n\n **Good culture, exceptional talent, and great community don't happen by\naccident --they're thoughtfully cultivated.**\n\nOur goal is to build an organization where:\n\n###\n\nWe feel valued as individuals\n\n * We feel safe, welcome, and included in the community.\n\n * We see community members as unique people with our own strengths, weaknesses, wants, and needs\n\n * We encourage each other and express gratitude for the contributions others make.\n\n###\n\nWe work as a team\n\n * We trust each other, honor our commitments, and take responsibility.\n\n * We help each other feel safe bringing forth new ideas, and understand the right places to do that so that others who are interested feel kept in the loop.\n\n * We look for ways to reduce work by eliminating and simplifying tasks and recognizing that sometimes _fewer_ ideas are better to achieve an outcome.\n\n * We know when, where, and how we can best contribute based on the current needs and direction of the organization.\n\n###\n\nWe make decisions together\n\n * We focus on reaching outcomes __ together.\n\n * Everyone contributes to building the direction of the organization in an emergent way.\n\n * We can see and understand how decisions get made.\n\n * We know where to go to find out more information or get involved in an area.\n\n * We work to create and use systems that help us avoid both information overwhelm and the sense that we need to follow everything that's happening in order to not miss out contributing our thoughts to something we care about.\n\n###\n\nWe learn and grow\n\n * We stretch our capabilities, grow our talents, and help others do the same.\n\n * We are constantly finding ways to learn and improve as an organization.\n\n * We leave breadcrumbs along the way for those who follow after us in the learning we have done.\n\n * We look for opportunities to invest in the learning and growth of others.\n\n###\n\nWe do impactful work\n\n * The work we do positively impacts our industry, our community, and the individual people who our organization interacts with.\n\n * The approaches we develop and the work we do benefits other communities.\n\n * The learning we share and the opportunities we provide creates new possibilities in the lives of others.\n\n[Philosophy - PreviousSecurity is about people](/philosophy/security-is-about-\npeople)[Next \\- PhilosophyIntentionally structured](/philosophy/intentionally-\nstructured)\n\nLast modified 1yr ago\n\nOn this page\n\nGood culture, exceptional talent, and great community don't happen by accident\n--they're thoughtfully cultivated.\n\nWe feel valued as individuals\n\nWe work as a team\n\nWe make decisions together\n\nWe learn and grow\n\nWe do impactful work\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/incentive-model-and-awards.json b/qa_bot/knowledge_base/c4/docs/incentive-model-and-awards.json new file mode 100644 index 0000000..06028bc --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/incentive-model-and-awards.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/awarding/incentive-model-and-awards", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Awarding process](/awarding/incentive-model-and-awards/awarding-process)\n\n[Curve logic for QA and Gas optimization reports](/awarding/incentive-model-\nand-awards/curve-logic)\n\n[FAQ about QA and Gas Reports](/awarding/incentive-model-and-awards/qa-gas-\nreport-faq)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Incentive model and awards\n\nTo incentivize **wardens** , C4 uses a unique scoring system with two primary\ngoals: reward contestants for finding unique bugs and also to make the audit\nresistant to Sybil attack. A secondary goal of the scoring system is to\nencourage contestants to form teams and collaborate.\n\n **Judges** are incentivized to review findings and decide their severity,\nvalidity, and quality by receiving a share of the prize pool themselves.\n\n##\n\nHigh and Medium Risk bugs\n\nContestants are given shares for bugs discovered based on severity, and those\nshares give the owner a pro rata piece of the pot:\n\n`Med Risk Shares: 3 * (0.9 ^ (findingCount - 1)) / findingCount` `High Risk\nShares: 10 * (0.9 ^ (findingCount - 1)) / findingCount`\n\nFindingCount represents the number of findings for a same specific bug. Please\nnote that findings with partial credit as still count as 1 finding in the\nalgorithm\n\nDuring awarding, each share is redeemed for: `pot / number of shares`.\n\n###\n\nBonus for best / selected for report\n\nFor each unique High or Medium finding, the submission selected for inclusion\nin the audit report receives a 30% share bonus.\n\n###\n\nDuplicates getting partial credit\n\nAll issues which identify the same functional vulnerability will be considered\nduplicates regardless of effective rationalization of severity or exploit\npath.\n\nHowever, any submissions which do not identify or effectively rationalize the\ntop identified severity case may be judged as \"partial credit\" and may have\ntheir shares in that finding's pie divided by 2 or 4 at judge's sole\ndiscretion (e.g. 50% or 25% of the shares of a satisfactory submission in the\nduplicate set).\n\n##\n\nBot races\n\nThe first hour of each Code4rena audit is devoted to a bot race, to\nincentivize high quality automated findings as the first wave of the audit.\n\n * The winning bot report is selected and shared with all wardens within 24 hours of the audit start time.\n\n * The full set of issues identified by the best automated tools are considered out of scope for the audit and ineligible for awards.\n\nDoing this eliminates the enormous overlapping effort of all wardens needing\nto document common low-hanging issues And because the best bot report is\nshared with auditors at the start of the audit, these findings serve as a\nthorough starting place for understanding the codebase and where weaknesses\nmay exist.\n\n **Ultimately, the bot race ensures human auditors are focused on things\nhumans can do.**\n\nBy designating a portion of the pool in this direction, Code4rena creates a\nseparate lane for the significant investment of effort that many auditors\nalready make in automated tooling -- and rather than awarding 100 people for\nidentifying the same issue, we award the best automated tools.\n\n##\n\nAnalyses\n\nEach warden is encouraged to submit an Analysis alongside their findings for\neach audit, to share high-level advice and insights from their review of the\ncode.\n\nWhere individual findings are the \"trees\" in an audit, the Analysis is a\n\"forest\"-level view.\n\nAdvanced-level Analyses compete for a portion of each audit's award pool, and\nare graded and awarded similarly to QA and Gas Optimization reports.\n\n##\n\nQA and Gas Optimization Reports\n\nIn order to incentivize wardens to focus efforts on high and medium severity\nfindings while also ensuring quality coverage, the pool's allocation is capped\nfor low severity, non-critical, and gas optimization findings.\n\nLow and non-critical findings are submitted as a **single** QA report.\nSimilarly, gas optimizations are submitted as a single gas report. For more on\nreports, see [Judging criteria](/awarding/judging-criteria).\n\nQA and gas optimization reports are awarded on a curve based on the judge's\nscore.\n\n * QA reports compete for a share of 2.5% of the prize pool (e.g. $1,250 for a $50,000 audit);\n\n * The gas optimization pool varies from audit to audit, but is typically 2.5% of the total prize pool (e.g. $1,250 for a $50,000 audit);\n\n * QA and Gas optimization reports are scored by judges using A/B/C grades (with C = unsatisfactory), and awarded on a curve.\n\nThere is a very high burden of quality and value provided for QA and gas\noptimization reports. Only submissions that demonstrate full effort worthy of\nconsideration for inclusion in the report will be eligible for rewards.\n\nIt is highly recommended to clearly spell out the impact of proposed gas\noptimizations.\n\nHistorically, Code4rena valued non-critical findings at 0; the intent of the\nQA report is not to increase the value of non-criticals, but rather to allow\nthem to be consolidated in reports alongside low severity issues.\n\n **Note:** Audits pre-dating February 3, 2022 awarded low risk and gas\noptimization shares as: `Low Risk Shares: 1 * (0.9 ^ (findingCount - 1)) /\nfindingCount`\n\n##\n\nGrades for Analyses, QA and Gas reports\n\nAnalyses, QA reports and Gas reports are graded A, B, or C.\n\nC scores are unsatisfactory and ineligible for awards.\n\nAll A-grade reports receive a score of 2; All B-grade reports get a 1.\nAwarding for QA and Gas reports is on a curve that's described\n[here](https://docs.code4rena.com/awarding/incentive-model-and-awards/curve-\nlogic).\n\n###\n\nBonus for best / selected for report\n\nJudges choose the best report in each category (Analysis, QA report, and Gas\nreport), each of which earns the same 30% share bonus described under \"High\nand Medium Risk bugs.\"\n\n **Note:** if the `selected for report` submission has a B-grade label, it\nwill still be treated as A-grade and given proportionally more than B-grade,\nplus the 30% bonus for being `selected for report`.\n\n##\n\nSatisfactory / unsatisfactory submissions\n\nAny submissions deemed unsatisfactory are ineligible for awards.\n\nThe bar for satisfactory submissions is that they are roughly at a level that\ncould be found in a draft report by a professional auditor: specifically on\nthe merits of technical substance, with writing quality considered only where\nit interferes with comprehension of the technical message.\n\nIt is possible for a submission to be _technically_ valid and still\nunsatisfactory. An \"unsatisfactory\" submission may meet any of these criteria:\n\n * incorrect\n\n * low/incomplete effort\n\n * out of scope\n\n * clearly overinflated severity\n\n * proof of concept does not pass the burden of proof test\n\n * approach is disrespectful of sponsors' and judges' time in some way\n\nAny submissions that appear to be direct copies of other reports in the\ncurrent audit will be collectively deemed unsatisfactory.\n\n[PreviousLookouts](/roles/certified-contributors/lookouts)[NextAwarding\nprocess](/awarding/incentive-model-and-awards/awarding-process)\n\nLast modified 10d ago\n\nOn this page\n\nHigh and Medium Risk bugs\n\nBonus for best / selected for report\n\nDuplicates getting partial credit\n\nBot races\n\nAnalyses\n\nQA and Gas Optimization Reports\n\nGrades for Analyses, QA and Gas reports\n\nBonus for best / selected for report\n\nSatisfactory / unsatisfactory submissions\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/index.json b/qa_bot/knowledge_base/c4/docs/index.json new file mode 100644 index 0000000..bc6774b --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/index.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Code4rena\n\nCommunity-driven competitions for smart contract audits\n\nThe players in the arena:\n\n * \u200b[ **Wardens**](/roles/wardens) protect the DeFi ecosystem from threats by auditing code.\n\n * \u200b[ **Sponsors**](/roles/sponsors) create prize pools to attract wardens to audit their project.\n\n * \u200b[ **Judges**](/roles/judges) decide the severity, validity, and quality of findings and rate the performance of wardens.\n\nC4 audits are different from both bug bounties and traditional audits.\n\n###\n\nBug bounties vs C4 audits\n\nBug bounties\n\n|\n\nCompetitive audits \n \n---|--- \n \n **Spec work.** No way to have confidence that the time invested will produce\na payout.\n\n|\n\n **Guaranteed payouts.** Auditors know it's highly likely they can find a bug\nthat will make it worth their time. \n \n **Dark forest.** Who knows how much competition there is right now? Or how\nmature the codebase is?\n\n|\n\n **Low-hanging fruit.** If a project is seeking an audit, it's likely fresh\ncode with clear opportunities to dig in. \n \n **Grow on your own.** Researchers have to proactively look for ways to learn\nand level up their skills.\n\n|\n\n **Learning community.** Open, competitive audits let auditors compare\neveryone's findings and learn new things every single week. \n \n **Paradox of choice.** So many projects have bounties. How does an auditor\nchoose which to focus on?\n\n|\n\n **Less FOMO.** C4 runs a handful of active audits at a time and wardens can\nRSVP to signal to each other which audits have more participants. \n \n###\n\nTraditional audits vs C4 audits\n\nTraditional audits\n\n|\n\nC4 audits \n \n---|--- \n \n **Constrained time.** If you want a quality audit from a top firm, you're\ngoing to have to wait.\n\n|\n\n **Time flexible.** Code audits can be put together quickly for teams eager to\ngo to market. \n \n **Constrained cost.** Audit firms must recruit and retain talent, and\ndefensively maintain their brand.\n\n|\n\n **Flexible cost.** C4 scales to meet demand. Sponsors can increase pot size\nto attract more attention. \n \n **Constrained diversity.** Audit firm staff have to work to stay ahead of\nDeFi's complex and expanding attack surface.\n\n|\n\n **Diverse capability.** C4 audits allow specialized security researchers to\ndemonstrate their skill and creativity. \n \n **Systematic.** Firms use set processes for evaluating code, which differs\nfrom the way attackers approach things.\n\n|\n\n **Rigorous.** C4 wardens are incentivized to work creatively to find as many\nrare, high risk vulnerabilities as possible. \n \n##\n\nIncentive model and awards\n\nDetails on Code4rena's incentive model and awards can now be found\n[here](/awarding/incentive-model-and-awards).\n\n[Next \\- RolesWardens](/roles/wardens)\n\nLast modified 2mo ago\n\nOn this page\n\nBug bounties vs C4 audits\n\nTraditional audits vs C4 audits\n\nIncentive model and awards\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/intentionally-structured.json b/qa_bot/knowledge_base/c4/docs/intentionally-structured.json new file mode 100644 index 0000000..5c38d90 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/intentionally-structured.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/philosophy/intentionally-structured", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Intentionally structured\n\n###\n\nWe are an intentionally structured open organization\n\nWe believe that a challenge as big as making DeFi more secure requires an\noutstanding community.\n\nBy creating clear, understandable structure to our organization, we empower\nmore people to feel welcome to get involved and play their part in building\nthe best community-driven organization we can.\n\n###\n\nWhy structure matters\n\nOpen organizations are not 'flat', as much as we might say they are, or even\nthough we might like them to be.\n\nAs [explored in detail by Jo\nFreeman](https://www.jofreeman.com/joreen/tyranny.htm), the absence of a\ndefined structure leads organizations to end up with hidden hierarchy and\nunwritten rules of 'how things really get done', which can be ultimately\ndisempowering and not very conducive to organizational clarity and\neffectiveness.\n\nIf everyone has a seat at the table, decisions end up being made at a table\nnot everyone can see. That's not malicious or deceitful, it's human nature and\nit's actually wise. Because of our innate understanding that new ideas need\nprotection and candid feedback is best given with a thoughtful approach,\npeople seek environments they trust where they can share ideas vulnerably and\nseek candid feedback without the need to feel defensive, fear they'll look\nstupid, or risk conflict.\n\nIn the absence of formal channels and approaches for this in any organization,\npeople will use informal channels, so failing to account for this reality\nactually exacerbates the tendency for open organizations to have hidden\nhierarchy.\n\nBased on this, it is our intent to spell out very clearly how things work and\nincreasingly work to hold ourselves accountable to work within our process.\n\n###\n\nLow stress, on schedule\n\nUnlike many other DAOs, C4 offers a service on a fixed\n[timeline](/structure/our-process), so we need an approach that helps us\nreliably execute on schedule.\n\nAs the organization evolves, we will take more and more of an 'open swarm'\napproach to our urgent tasks, but in the near-term we rely on a group of paid\nstaff who keep things moving forward every day so the rest of the community\ncan contribute where and how they see best.\n\nThe goal of this phase is to so thoroughly and completely document and\nsystematize our processes that we are able to consistently execute an\nincreasingly complex process with reliability and low stress.\n\n###\n\nTools and processes to shape the river\n\nInformation and decisions flow at different speeds, with each requiring\ndifferent context, granularity, and presentation.\n\nThe tools we use and the ways we use them tend to set the pace and flow of the\nriver of activity. While people often want to have one tool to rule them all,\na mixture of tools is essential for capturing information, reducing cognitive\nload and creating emotional safety while blending urgent vs. emergent, 1ft vs.\n30,000ft, process vs. strategy, task vs possibility.\n\n###\n\n\u200b\n\n[Philosophy - PreviousThe culture we're building](/philosophy/how-we-\nwork)[Next \\- Other DetailsFAQ](/structure/frequently-asked-questions)\n\nLast modified 1yr ago\n\nOn this page\n\nWe are an intentionally structured open organization\n\nWhy structure matters\n\nLow stress, on schedule\n\nTools and processes to shape the river\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/judges.json b/qa_bot/knowledge_base/c4/docs/judges.json new file mode 100644 index 0000000..a97b558 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/judges.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/judges", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[How to judge an audit](/roles/judges/how-to-judge-a-contest)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Judges\n\nJudges decide the severity, validity, and quality of findings and rate the\nperformance of wardens.\n\nJudging is a central part of the C4 competitive audit model.\n\nIn order to align wardens with sponsors, and remove the burden from each\nplayer in the arena from determining the ultimate severity of findings, C4\nselects an impartial judge for each audit.\n\nJudges review warden findings and sponsor input, and come to their own\nindependent conclusion. Doing this enables C4 to determine awards for wardens\nand categorize findings for audit reports.\n\nIn order to ensure the impartiality of the process, the C4 community gives\njudges final authority to determine the severity of findings. Judges receive\ncompensation for their work based on a percentage of the audit pool.\n\n##\n\nBecoming a judge\n\nJudging is an important and elite Code4rena role. It also provides a\nguaranteed payout from each audit.\n\nIn order to ensure accountability for impartiality, the C4 community taps\njudges who are typically established, visible members of the DeFi community\nwhich come based on endorsements from someone who has judged at least two\naudits.\n\nWe also have a process for vetting judge applications from within the C4\ncommunity.\n\n###\n\nCommunity judges\n\nCommunity judges are those who emerge self-selected from the C4 community. We\nare thrilled to have folks interested in playing this role.\n\nIn order to ensure the impartiality and depth of knowledge we aim for among\njudges, we have some additional diligence required for self-selecting judges.\n\n####\n\nMinimum criteria (subject to change)\n\n * Compete in at least 3 Code4rena audits\n\n * Find at least 3 high severity bugs\n\n * Must be a [Certified C4 contributor](/roles/certified-contributors) in good standing\n\n####\n\nNon-technical criteria\n\n * **Sense of fairness** --i.e. evidence suggests you don't show favoritism, but instead aim for a fair competition where quality is rewarded.\n\n * **Discernment** --ability to impartially a) assess the logic behind a vulnerability, b) verify its validity, and c) determine its severity, given the specifics outlined by the warden as well as the broader context of the code.\n\n * **Clear written communication** --your English does not need to be perfect, but you should be able to engage in nuanced discussions with wardens and sponsors via written text.\n\n###\n\nHow to apply\n\nComplete [this form](https://code4rena.com/judge-application/) and share:\n\n * Short bio/intro and summary of relevant experience\n\n * Links that help demonstrate your expertise\n\n * 3 example submissions to Code4rena audits that were judged high severity\n\n * Description of how each submission demonstrates your depth of knowledge\n\n **Note:** judge applications are reviewed during a one-week period each\nmonth. Notices of application and review windows will be posted in the C4\nDiscord server.\n\n###\n\nJudge selection process\n\nBeing a judge is a critical role and we only have so many spots.\n\nJudge applications are reviewed monthly by the C4 judge selection committee,\nwhich includes top leaderboard wardens and past judges. The committee will\nreview your application and give you a \"yes\" or \"not yet\".\n\n###\n\nIs it possible for a warden who competed in an audit to judge that same audit?\n\nYes--but in the interest of impartiality, they must forgo any awards they\nwould have received for their findings in said audit.\n\n[PreviousAudit process](/roles/sponsors/contest-process)[NextHow to judge an\naudit](/roles/judges/how-to-judge-a-contest)\n\nLast modified 2mo ago\n\nOn this page\n\nBecoming a judge\n\nCommunity judges\n\nHow to apply\n\nJudge selection process\n\nIs it possible for a warden who competed in an audit to judge that same audit?\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/judging-criteria.json b/qa_bot/knowledge_base/c4/docs/judging-criteria.json new file mode 100644 index 0000000..0d1c6fb --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/judging-criteria.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/awarding/judging-criteria", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Severity Categorization](/awarding/judging-criteria/severity-categorization)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Judging criteria\n\n###\n\nSubmission Review Process\n\nC4 strives to ensure a deliberate and transparent process for reviewing and\njudging submissions.\n\nAt the end of a given audit period, all reports will be reviewed and\ncategorized based on these criteria. Pending sponsor review, final reports\nwill be shared publicly on the [C4 Audit Report\npage](https://code4rena.com/reports). Audit results are shared on the C4\nDiscord and winners announced on the [C4\nTwitter](https://twitter.com/code423n4).\n\nReports are also judged based on grammar, conciseness, and formatting.\n\n###\n\nBest Current Practices\n\nThe [Code4rena org repo](https://github.com/code-423n4/org) documents open\ndiscussions and emergent best practices for judging C4 audits. Judges are\nencouraged to review [open issues in that\nrepo](https://github.com/code-423n4/org/issues) regularly.\n\n###\n\nDuplicate Submissions\n\nShould multiple submissions describing the same vulnerability be submitted,\nJudges have the discretion to place these bugs into the same bucket, in which\ncase, the award will be shared among those who submitted. However, multiple\nsubmissions from the same warden (or warden team), are treated as one by the\nawarding algorithm and do not split the pie into smaller pieces.\n\n###\n\nScope\n\nEach audit may include code that is explicitly in scope and out of scope, and\nspecific issues which also may be identified as out of scope.\n\nWardens who adhere to the audit guidelines and report valid low/medium/high\nseverity bugs which are not explicitly excluded from scope will earn a\nguaranteed payment.\n\nWardens _may_ elect to argue to bring things into scope--either by making the\ncase that an issue poses a more urgent threat than identified or by submitting\na medium or high severity finding in code which is out of scope. However, it\nis up to judges' absolute discretion whether to include these findings and\naward them, and these issues should include a clear argument as to why the\nitems merit being brought into scope.\n\nIn the interest of everyone's time, **please do not offer QA or gas reports on\nany code or known issues which are identified as out of scope.**\n\n###\n\nScoring\n\nThe scoring system has three primary goals:\n\n * Rewarding Wardens for finding unique bugs\n\n * Hardening C4 code audits to Sybil attacks\n\n * Encouraging coordination by incentivizing Wardens to form teams.\n\n###\n\nAnalysis\n\nAn analysis is a written submission outlining:\n\n * Wardens' analysis of the codebase as a whole and any observations or advice they have about architecture, mechanism, or approach\n\n * Broader concerns like systemic risks or centralization risks\n\n * The approach taken in reviewing the code\n\n * New insights and learnings from the audit\n\nIf individual findings are trees, Analyses are the forest. They provide\nwardens with an opportunity to contribute value through high level insights\nand advice that aren't necessarily covered by specific bugs -- and a way to\nget credit for doing so.\n\nAnalyses are judged A/B/C, with the top Analysis selected for inclusion in the\naudit report, similarly to Gas and QA reports.\n\n###\n\nQA reports (low/non-critical)\n\nLow and non-critical findings must be submitted as a _single_ QA report per\nwarden. We allocate a **fixed 2.5% of prize pools toward QA reports.**\n\nYour QA report should include:\n\n * all low severity findings; and\n\n * all non-critical findings.\n\nEach QA report will be assessed based on report quality and thoroughness as\ncompared with other reports, with awards distributed on a curve. The top QA\nreport author will receive the top prize from the category.\n\nWardens overstating the severity of QA issues (submitting low/non-critical\nissues as med/high in order to angle for higher payouts) will have their\nscores reduced by judges.\n\nIn the unlikely event that zero high- or medium-risk vulnerabilities are\nfound, the full pool will be divided based on the QA Report curve.\n\n###\n\nGas reports\n\nGas reports should be submitted using the **same approach as the QA reports:**\na single submission per warden which includes all identified optimizations.\nThe gas pool will be allocated on a curve, and the top reporter will receive\nthe top prize in the category.\n\nThe gas pool varies from audit to audit, but typically it consists of 2.5% of\nthe total prize pool. The precise gas pool for each audit can be found in that\naudit's repo.\n\n##\n\nEstimating Risk\n\nSee [Severity Categorization](https://docs.code4rena.com/awarding/judging-\ncriteria/severity-categorization).\n\n[PreviousFAQ about QA and Gas Reports](/awarding/incentive-model-and-\nawards/qa-gas-report-faq)[NextSeverity Categorization](/awarding/judging-\ncriteria/severity-categorization)\n\nLast modified 10d ago\n\nOn this page\n\nSubmission Review Process\n\nBest Current Practices\n\nDuplicate Submissions\n\nScope\n\nScoring\n\nAnalysis\n\nQA reports (low/non-critical)\n\nGas reports\n\nEstimating Risk\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/lookouts.json b/qa_bot/knowledge_base/c4/docs/lookouts.json new file mode 100644 index 0000000..17c8ca8 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/lookouts.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/certified-contributors/lookouts", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\n[+Backstage wardens](/roles/certified-contributors/backstage-wardens)\n\n[Lookouts](/roles/certified-contributors/lookouts)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find \u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Lookouts\n\n##\n\n **Lookouts**\n\nLookouts review and organize submissions to Code4rena's competitions, focusing\non a) lightening and clarifying the project team's workload, and b) preparing\nthe repo for judging.\n\nLookouts receive a guaranteed payout from each audit they work on. In the\ninterest of impartiality, they must forgo any awards they would have received\nfor their own submissions as wardens. Therefore, the role is typically\nassigned prior to the competition's start.\n\n##\n\n **Becoming a Lookout**\n\nTo become a Lookout, you may be nominated by a Judge or Lookout in good\nstanding, or nominate yourself.\n\n####\n\n **Minimum criteria**\n\n * Compete in at least 3 Code4rena audits;\n\n * Be a [Certified C4 contributor](/roles/certified-contributors) in good standing;\n\n * Find at least 1 high severity finding OR 3 medium severity findings OR score A on at least 3 QA or Gas reports;\n\n####\n\n **Non-technical criteria**\n\n * **Sense of fairness** --i.e. evidence suggests you don't show favoritism, but instead aim for a fair competition where quality is rewarded.\n\n * **Clear written communication** --your English does not need to be perfect, but you should be able to engage in technical discussions with judges and sponsors via written English.\n\n##\n\nHow to apply\n\nComplete [this application form](https://code4rena.com/lookout-application/)\nand share:\n\n * Short bio/intro and summary of relevant experience\n\n * Links that help demonstrate your expertise\n\n * 3 example submissions to Code4rena audits that you're especially proud of\n\n * Description of how each submission demonstrates your depth of knowledge\n\n **Note:** Lookout applications are reviewed during a one-week period each\nmonth. Notices of application and review windows will be posted in the C4\nDiscord server.\n\n **Lookout selection process**\n\nBeing a lookout is a critical role and we only have so many spots.\n\nLookout applications are reviewed monthly by a group of C4 judges and\nlookouts. The group will review your application and give you a \"yes\" or \"not\nyet\".\n\n[Previous+Backstage wardens](/roles/certified-contributors/backstage-\nwardens)[Next \\- AwardingIncentive model and awards](/awarding/incentive-\nmodel-and-awards)\n\nLast modified 2mo ago\n\nOn this page\n\nLookouts\n\nBecoming a Lookout\n\nHow to apply\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/our-process.json b/qa_bot/knowledge_base/c4/docs/our-process.json new file mode 100644 index 0000000..c3a6b79 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/our-process.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/structure/our-process", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Audit timeline\n\nThis is a high level overview of the C4 audit timeline. We have an\nincreasingly detailed operational manual in Notion.\n\nWe are working on tightening up all of our processes in order to be able to\ndistribute awards and publish reports more quickly. Here's our estimated\ntimeline:\n\nText\n\n|\n\nIdeal\n\n|\n\nActual (on average) \n \n---|---|--- \n \nAudit submissions close\n\n|\n\nDay 1\n\n|\n\nDay 1 \n \nLookout pre-sorts findings (de-duping and triage)\n\n|\n\nDay 7\n\n|\n\nDay 7 \n \nSponsors review and give feedback on findings\n\n|\n\nDay 9\n\n|\n\nDay 14 \n \nJudges determine final severity\n\n|\n\nDay 12\n\n|\n\nDay 21-30 \n \nJudging QA complete; awards announced\n\n|\n\nDay 15\n\n|\n\nDay 25-34 \n \nAwards are distributed; Sponsors complete mitigation of any issues\n\n|\n\nDay 15\n\n|\n\nDay 32-44 \n \nAudit report is published; Audit issues are made public\n\n|\n\nDay 21\n\n|\n\nDay 42-60 \n \nPresently, the process takes us longer than the ideal, but we are actively\nworking to improve our planning and processes that will smooth out the rough\nspots that slow things down.\n\nSee also: [Awarding process](https://docs.code4rena.com/awarding/incentive-\nmodel-and-awards/awarding-process)\u200b\n\n[Other Details - PreviousFAQ](/structure/frequently-asked-questions)[Next \\-\nOther DetailsWhere can I find\u2026?](/structure/where-can-i-find...)\n\nLast modified 20d ago\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/qa-gas-report-faq.json b/qa_bot/knowledge_base/c4/docs/qa-gas-report-faq.json new file mode 100644 index 0000000..18d5d5f --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/qa-gas-report-faq.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/awarding/incentive-model-and-awards/qa-gas-report-faq", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Awarding process](/awarding/incentive-model-and-awards/awarding-process)\n\n[Curve logic for QA and Gas optimization reports](/awarding/incentive-model-\nand-awards/curve-logic)\n\n[FAQ about QA and Gas Reports](/awarding/incentive-model-and-awards/qa-gas-\nreport-faq)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# FAQ about QA and Gas Reports\n\nThis FAQ pertains to the award mechanism update that takes effect February 3,\n2022, which changes the submission guidelines for low-risk, non-critical, and\ngas optimization reports. For more details, see [Judging\nCriteria](https://docs.code4rena.com/roles/wardens/judging-criteria).\n\n###\n\nWhat happens to the award pool if no Med/High vulns are found?\n\nThe full pool would then be divided based on the QA Report curve.\n\n###\n\nWill non-critical findings hold some weight? Just want to know if it's worth\nspending a considerable amount of time writing this part of the report.\n\nThe full QA report will be graded on a curve against the other reports. We'll\nbe experimenting together as a community with this, but we think we'll learn a\nlot and it will be interesting to see the best practices emerge.\n\nWe are intentionally not providing an \"example,\" as we are eager to see what\napproaches folks take and to be able to learn from a variety of approaches.\n\n###\n\nWhat if a low-impact QA report turns out to be a high-impact report? How does\nthat work with the 10% prize pool? Would the report be upgraded?\n\nIt's conceivable it could be upgraded, though it's important to consider that\npart of auditing is demonstrating proper theory of how an issue could be\nexploited. If a warden notices something is \"off\" but is unable to articulate\nwhy it could lead to loss of funds, for example, the job is only half-done;\nwithout understanding the implications, a developer could very well overlook\nor deprioritize the issue.\n\nThe tl;dr for determining severity is relatively clear with regard to\nseparating by impact.\n\n###\n\nWhat happens when an issue submitted by the warden as part of their QA report\n(an L or N) _DOES_ get bumped up to Med/High by the judge after review?\n\nIf it seemed appropriate to do so based on a judge's assessment of the issue,\nthey could certainly choose to do this.\n\nThe judge could create a new separate Github issue in the findings repo that\ncontains the relevant portions of the warden's QA report, and add that to the\nrespective H or M level bucket.\n\nHowever, QA items may be marked as a duplicate of another finding _without_\nbeing granted an upgrade, since making the case for _how_ an issue can be\nexploited, and providing a thorough description and proof of concept, is part\nof what merits a finding properly earning medium or high severity.\n\n###\n\nConversely, in the reverse situation where an issue submitted by wardens as\nH/M level, is subsequently downgraded to QA level by the judge during their\nreview, would the penalty just be excluding the overrated warden submission\nfrom consideration in regards to the QA rewards?\n\nWe'll need to see how it works in reality, but our current assumption is that\n(a) low severity findings attempted to get pushed into med/high would\nessentially get zero (just logically so since they wouldn't be high or med),\nand then (b) their QA report would be lower quality as a result, and so they\nwouldn't score as highly as they could have. Judges could also decide to mark\noff points in someone's QA report if they saw behavior that seemed like it\nmight be trying to game for higher rewards by inflating severity, so it could\nhave a negative consequence as well.\n\n[PreviousCurve logic for QA and Gas optimization reports](/awarding/incentive-\nmodel-and-awards/curve-logic)[Next \\- AwardingJudging\ncriteria](/awarding/judging-criteria)\n\nLast modified 1yr ago\n\nOn this page\n\nWhat happens to the award pool if no Med/High vulns are found?\n\nWill non-critical findings hold some weight? Just want to know if it's worth\nspending a considerable amount of time writing this part of the report.\n\nWhat if a low-impact QA report turns out to be a high-impact report? How does\nthat work with the 10% prize pool? Would the report be upgraded?\n\nWhat happens when an issue submitted by the warden as part of their QA report\n(an L or N) DOES get bumped up to Med/High by the judge after review?\n\nConversely, in the reverse situation where an issue submitted by wardens as\nH/M level, is subsequently downgraded to QA level by the judge during their\nreview, would the penalty just be excluding the overrated warden submission\nfrom consideration in regards to the QA rewards?\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/security-is-about-people.json b/qa_bot/knowledge_base/c4/docs/security-is-about-people.json new file mode 100644 index 0000000..c3a6c27 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/security-is-about-people.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/philosophy/security-is-about-people", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Security is about people\n\nWe tend to think of software as a cold, mechanical, logical thing.\n\nBut _people_ write code. Emotions and the way we handle them impact the\nsecurity of a project as much or perhaps more than any other contributing\nfactor.\n\nThough almost no one will admit it, everyone's carrying around a lot of\nemotion when it comes to security. These feelings strongly affect the way\npeople approach security even though most people never show them.\n\nLike software vulnerabilities, there are no flags waving and red lights\nblaring. These feelings are buried deep and never raise their head above the\nsurface--but they're ever present.\n\n###\n\nFear and shame drive security\n\n * Developers fear they'll be publicly embarrassed by a hack.\n\n * Auditors worry they'll miss something that gets exploited.\n\nIn the world of DeFi, the emotional intensity of this fear is even greater. A\nhigh-profile hack could mean not only friends losing money, but also getting\npiled on by the mob of speculators wanting token prices to go \"up only\".\n\nThese fears lead people to approach security in a way that is less productive\nand effective than it could be, leading projects to focus on security theater,\nimage management, and trying to turn audits into proof of a project's security\nso that blame can be swiftly assigned.\n\nBut security doesn't work that way. Top audit teams and well respected\ndevelopers have had their contracts exploited. Security is a process not a\ndestination, and that's even more true in a bleeding edge domain like DeFi.\n\n###\n\nWe can do better\n\nWe aim to reduce the fear and shame driven approach and replace it with a\nhealthier one_,_ which starts with recognizing what an emotionally challenging\ntask it is to secure code.\n\n> _It is not the critic who counts; not the one who points out... where the\n> doer of deeds could have done them better._ ** _The credit belongs to the\n> one who is actually in the arena_** _... who errs, who comes short again and\n> again, because there is no effort without error and shortcoming._ --Teddy\n> Roosevelt\n\nWe think of _everyone--_auditors, developers--as playing their role together\nin the arena, with the ultimate goal being to level up smart contract security\nwhile reducing the pressure and burden on individual people.\n\nWhile we use a competitive arena to add a dimension of fun and an incentive to\nstrive for the best work we can each uniquely contribute, we strongly believe\n**security is a collaborative, community effort.**\n\n##\n\nSecurity is about people\n\nFear and shame are powerful motivators, but they are unsustainable, short-\nsighted, and redirect attention to appearance rather than substance.\n\nIt may not be \"logical\" for humans to act in emotional ways, but humans are\nindeed emotional beings. Rather than arguing against reality, we should\nacknowledge this and work with it. As one of C4's bootstrappers, Scott Lewis,\nput it:\n\n> \"The way all human brains work is not silly. We should change technology for\n> humans, not change humans for technology.\"\n\nWe believe the same is true for both our approach to [building C4 as an\norganization](/philosophy/how-we-work) and the way we view security.\n\n###\n\nGetting an audit is courageous\n\nAn audit is an investment in a better long-term outcome for both the project\nundergoing the audit as well as its users and the community as a whole.\n\nIt requires tremendous courage to ask someone to look closely and find as many\nplaces where code could be improved.\n\nBecause of this, we treat sponsor projects with respect. We consider every\nfinding discovered in an audit contest as a tool that we can use in the future\nto help others learn and better understand smart contract vulnerabilities.\n\n###\n\nBoth wardens and sponsors have a voice in the process\n\nWe take sponsors' feelings into consideration, working to help find ways to\ncommunicate and disclose the results of audits in a way that works as best as\nwe can to eliminate the \"blame and shame\" approach and replace it with one\nthat honors sponsors' contribution toward making their project and the DeFi\nspace more secure for everyone.\n\nOur contest process is transparent, with all issues reviewable on GitHub. We\nput an impartial judge in the position of listening to all players' positions\non a given issue and making a final determination.\n\n###\n\nWe are a community\n\nWe trust that a community-driven approach with valuable incentives ensures\nenough coverage to give sponsors a meaningful and valuable audit without\nputting the burden on any one person to do a 'good enough job' catching every\nbug--a burden that burns out traditional auditors.\n\nOur community chose the wolf as our symbol, and it's fitting. Wolves are\nhighly collaborative creatures who hunt in packs.\n\nRather than putting the pressure on individual auditors to catch all the\nvulnerabilities in a project, we ask competitors to catch the bugs that they\ncan, and trust that the community will show up and contribute.\n\n[Awarding - PreviousFairness and validity](/awarding/fairness-and-\nvalidity)[Next \\- PhilosophyThe culture we're building](/philosophy/how-we-\nwork)\n\nLast modified 1yr ago\n\nOn this page\n\nFear and shame drive security\n\nWe can do better\n\nSecurity is about people\n\nGetting an audit is courageous\n\nBoth wardens and sponsors have a voice in the process\n\nWe are a community\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/severity-categorization.json b/qa_bot/knowledge_base/c4/docs/severity-categorization.json new file mode 100644 index 0000000..c45fb4d --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/severity-categorization.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/awarding/judging-criteria/severity-categorization", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Severity Categorization](/awarding/judging-criteria/severity-categorization)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Severity Categorization\n\n###\n\nEstimating Risk\n\nWhere **assets** refer to funds, NFTs, data, authorization, and any\ninformation intended to be private or confidential:\n\n * **QA (Quality Assurance)** Includes both **Non-critical** (code style, clarity, syntax, versioning, off-chain monitoring (events, etc) and **Low risk** (e.g. assets are not at risk: state handling, function incorrect as to spec, issues with comments). Excludes Gas optimizations, which are submitted and judged separately.\n\n * **2 -- Med:** Assets not at direct risk, but the function of the protocol or its availability could be impacted, or leak value with a hypothetical attack path with stated assumptions, but external requirements.\n\n * **3 -- High:** Assets can be stolen/lost/compromised directly (or indirectly if there is a valid attack path that does not have hand-wavy hypotheticals).\n\n###\n\nSeverity Standardization Process\n\nJudges and the C4 community collaborate in open discussions of severity\nstandards, which has created an evolving meta that's unique to C4 and enables\nboth the organizations being audited and the auditors themselves to be part of\na platform that is self-reflective and is constantly iterating on its\nprocesses for their collective benefit.\n\nThe rules above act as a starting point, and these open discussions act as a\ngrowing set of case law examples. You can view the open forum where these\ndiscussions are held\n[here](https://github.com/code-423n4/org/issues?q=is%3Aissue+is%3Aopen+label%3Arules).\n\n[Awarding - PreviousJudging criteria](/awarding/judging-criteria)[Next \\-\nAwardingFairness and validity](/awarding/fairness-and-validity)\n\nLast modified 6mo ago\n\nOn this page\n\nEstimating Risk\n\nSeverity Standardization Process\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/solo-audits.json b/qa_bot/knowledge_base/c4/docs/solo-audits.json new file mode 100644 index 0000000..f6cd94a --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/solo-audits.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/wardens/solo-audits", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Submission policy](/roles/wardens/submission-policy)\n\n[Solo audits](/roles/wardens/solo-audits)\n\n[Authenticate with your wallet](/roles/wardens/warden-auth)\n\n[Tools and resources](/roles/wardens/tools-and-resources)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Solo audits\n\n\u200b[Certified Wardens](https://docs.code4rena.com/roles/certified-contributors)\nhave the opportunity to book solo audits through Code4rena, via the \"Available\nfor hire\" function in their website profiles. These audits are performed by\none warden who reviews the sponsor's code and produces a report. ([Here's an\nexample](https://code4rena.com/reports/2022-07-canto/) of a solo audit\nperformed by a Code4rena warden.)\n\n * The process of booking a solo audit begins with a project team member clicking the \"Get a quote\" button on a warden's profile, and sharing scoping details with the Code4rena team.\n\n * Code4rena staff will then consult with the warden and project team to firm up scoping, pricing, and dates.\n\n##\n\nBenefits to projects and wardens\n\n * When a solo audit is booked via a warden's \"Available for hire\" function, both the warden and the project have direct access to Code4rena staff for assistance with:\n\n * Legal agreements\n\n * Scoping, pricing, and logistical planning\n\n * Collection and disbursement of funds\n\n * External accountability via a Code4rena Judge\n\n * This top-tier support you'll receive from the Code4rena team is built into the solo audit booking model, at only 20% of the total audit fee.\n\n##\n\nHow can I opt in to solo audits?\n\nIf you are a Certified warden, you can make yourself available for solo audits\nby logging in to your [user account on the Code4rena\nwebsite](https://code4rena.com/account/edit-profile), and opting in to the\n\"Available for hire\" function on the Settings screen.\n\n[PreviousSubmission policy](/roles/wardens/submission-policy)[NextAuthenticate\nwith your wallet](/roles/wardens/warden-auth)\n\nLast modified 2mo ago\n\nOn this page\n\nBenefits to projects and wardens\n\nHow can I opt in to solo audits?\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/sponsors.json b/qa_bot/knowledge_base/c4/docs/sponsors.json new file mode 100644 index 0000000..77fdee0 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/sponsors.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/sponsors", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Audit process](/roles/sponsors/contest-process)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Sponsors\n\nSponsors purchase a competitive audit, which includes an award pot to\nincentivize wardens to audit their project.\n\n##\n\nSponsoring an audit\n\nAny project can submit a request to sponsor a C4 audit. Just [complete this\nform](https://code4rena.typeform.com/i-want-an-audit) and we will reach out to\nset up a meeting or send over a scoping questionnaire to get the ball rolling.\n\nOne of our team members will review your repo, assess your responses and\ncontracts, and recommend which audit package would be appropriate for your\nscope.\n\n **If you decide to move ahead with an audit, all relevant code will be made\npublic at the time of your audit in most cases.** We also offer KYC and\nprivate competitions if privacy is a need; just let our team know.\n\n###\n\nScoping\n\nOur scoping form asks for several technical details to help our team assess\nthe scope of your audit. There are several scoping considerations beyond a\nsimple sLOC count. Here are two we're often asked about:\n\n 1. 1.\n\n **Lines of Code count:** Please [run the `prettier`\nplugin](https://github.com/prettier-solidity/prettier-plugin-solidity)\nconfigured to a 100-character line length before counting LOCs. (You don't\nneed to commit these changes to your repo; it's just for getting a\nstandardized LOC count.)\n\n 2. 2.\n\n **Test coverage %:** If you have less than 80% test coverage on your\ncontracts, we strongly advise booking a [Test Coverage\ncompetition](https://code4rena.com/test-coverage) immediately prior to your\nCode4rena audit. Doing this can drastically improve the quality of your audit\nby reducing the number of invalid submissions, incentivizing top-performing\nwardens, and typically saves you time overall by speeding up the judging and\nreview phase of your audit.\n\nThere are other benefits, too, [all outlined\nhere](https://medium.com/code4rena/new-to-code4rena-test-\ncoverage-c548645404f9).\n\n####\n\nRunning `prettier` to provide a standardized LOC count\n\nThe default command to run `prettier` is `prettier --write contracts/**/*.sol`\nor `npx prettier -w $(find contracts src -name \"*.sol\" | grep -v \\.t\\.sol)`\n\nWe use a 100-character line length standard for scoping, and our default\n`.prettierrc` is:\n\n{\n\n\"overrides\": [\n\n{\n\n\"files\": \"*.sol\",\n\n\"options\": {\n\n\"printWidth\": 100,\n\n\"tabWidth\": 4,\n\n\"useTabs\": false,\n\n\"singleQuote\": false,\n\n\"bracketSpacing\": false,\n\n}\n\n}\n\n]\n\n}\n\n###\n\nDetermining pot size\n\nTo attract warden participation in the highly competitive engineering market,\nwe work with standard award pool sizes based on the scope of the audit. We\nregularly evaluate and adjust audit pricing to ensure incentive alignment with\nwardens. Sponsors always have the option of boosting their award pool, which\ntends to attract more warden talent and attention.\n\n###\n\nAnalysis pool\n\n5% of each audit's award pool is typically allocated to Analyses. These\nreports contain high-level advice and review of the code: the \"forest\" to\nindividual findings' \"trees.\" They augment and contextualize the bug reports\nthat are incentivized by the remaining 95% of the pool.\n\nFor a long time, wardens have wanted a better place to contribute value via\nthe high-level / overview / advice that isn't necessarily covered by specific\nbugs. The Analysis pool provides them with a method to get credit for this\nadvisory-level work.\n\nProjects have discretion to adjust the default allocation for the Analysis\npool up or down; this should be clarified during the pre-audit booking and\nsetup phase.\n\n###\n\nGas optimization pool\n\nBy default, 2.5% of the award pool is allocated to valid gas optimizations. We\nencourage all sponsors to keep this in place, as we can help each other be\nconscious of ways to minimize gas fees for users -- and indeed some sponsors\nmay which to allocate a higher percentage of the award pool to this purpose.\n\nSome projects may not wish to create a separate incentive for gas\noptimizations, and removing it should be discussed with Code4rena staff during\nthe pre-audit setup phase.\n\n###\n\nOrg fee\n\nThere is a fee on top of the determined audit pool, which goes to the\nCode4rena DAO to cover the costs associated with organizing, promoting, and\nreporting on audits.\n\n###\n\nAudit scheduling\n\nOur standard, one-week audits start and end on weekdays at 20:00:00 UTC. Due\nto high demand, we only lock audits into the schedule after receiving a\ndeposit for the audit; we are unable to make scheduling commitments otherwise.\n\nNote that in order to provide your team with the most efficient and effective\ncode review, we require your team to add ALL code, documentation, and notes to\nyour audit repo at least 2 business days prior to your audit start time.\n\n[PreviousTools and resources](/roles/wardens/tools-and-resources)[NextAudit\nprocess](/roles/sponsors/contest-process)\n\nLast modified 3d ago\n\nOn this page\n\nSponsoring an audit\n\nScoping\n\nDetermining pot size\n\nAnalysis pool\n\nGas optimization pool\n\nOrg fee\n\nAudit scheduling\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/submission-policy.json b/qa_bot/knowledge_base/c4/docs/submission-policy.json new file mode 100644 index 0000000..463e8c0 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/submission-policy.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/wardens/submission-policy", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Submission policy](/roles/wardens/submission-policy)\n\n[Solo audits](/roles/wardens/solo-audits)\n\n[Authenticate with your wallet](/roles/wardens/warden-auth)\n\n[Tools and resources](/roles/wardens/tools-and-resources)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Submission policy\n\nCode4rena is an open organization committed to improving the security of\ndecentralized protocols while protecting the information of our sponsors and\nparticipants. This policy is intended to provide C4 Wardens (security\nresearchers) clear guidelines for participating in code audits while\nconducting vulnerability discovery activities.\n\nThe following policy conveys C4's preferences in how to submit discovered\nvulnerabilities to the organization and describes what systems and types of\nresearch are covered under this policy, how to share vulnerability reports,\nand the length of time we expect Wardens to wait prior to publicly disclosing\nvulnerabilities.\n\nReports can be submitted at any point prior to stop time for a given audit.\nThe details for each code audit can be found through [the Code4rena\nwebsite](https://code4rena.com/).\n\nAll community members agree to be bound by the Code4rena Code of Conduct,\nwhich can be viewed [in the Code4rena\nDiscord](https://discord.com/channels/810916927919620096/851883682470166558/851891396255940618).\n\n##\n\nAudit contest guidelines\n\nUnder this policy, audit contests covers activities in which you:\n\n * Register as a C4 Warden within an individual capacity or as part of a team.\n\n * Submit your bug report using the submission form.\n\n * Make every effort to avoid privacy violations, degradation of user experience, disruption to production systems, and destruction or manipulation of data, especially in regard to funds.\n\n * Only use exploits to the extent necessary to confirm a vulnerability's presence. Do not use an exploit to compromise funds, exfiltrate data, establish persistent permissioning access, or use the exploit to redirect to other systems.\n\n * Unless explicitly noted by the affiliated sponsor, **wait until the audit report has been published** before you disclose it publicly.\n\n * Do not submit a high volume of low-quality reports.\n\nIn the event that you encounter a critical vulnerability that the sponsor\nproject would want to know about, even before the end of the audit, please\nrefer to [\"How to submit Zero-day or otherwise highly sensitive\nbugs.\"](/roles/wardens/submission-policy#how-to-submit-zero-day-or-otherwise-\nhighly-sensitive-bugs)\u200b\n\n> Without explicit permission from sponsors, publishing or discussing findings\n> publicly prior to report publication is grounds for immediate forfeit of\n> award and disqualification from any future C4 events and activities.\n\n##\n\nSubmitting a report\n\nC4 accepts vulnerability reports via the audit submission form.\n\nIn order to help us triage and prioritize submissions, please ensure that your\nreports:\n\n * Are submitted no later than the audit stop time.\n\n * Use the audit submission process.\n\n * Follow the correct report format. (See next section.)\n\n * Describe the location the vulnerability was discovered and the potential impact of exploitation.\n\n * Offer a detailed description of the steps needed to reproduce the vulnerability (proof of concept scripts or screenshots are helpful).\n\n * Have not been surfaced as \"known issues\" (see audit repo `README` for details).\n\n * Are written in English, if possible.\n\nIt is also recommended to ensure you receive email confirmation of each\nsubmission. (If you do not see an email confirmation, please check your spam\nfolder.)\n\n###\n\nReport format\n\n * Medium or High severity findings should be submitted individually.\n\n * All QA findings (Low risk or Non-critical) must be submitted as a single QA report per warden (or team).\n\n * All Gas optimizations must be submitted as a single Gas report per warden (or team).\n\nWardens who submit multiple QA and/or Gas findings to a single audit without\nfollowing the required format will have all QA/Gas submissions invalidated for\nthat audit.\n\nFor more details on QA and Gas reports, and estimating risk, please see\n[Judging Criteria](https://docs.code4rena.com/roles/wardens/judging-\ncriteria#qa-reports-low-non-critical).\n\n###\n\nBurden of proof\n\nWardens have the burden of proof in submissions. Explaining and rationalizing\nthe potential impact is an essential part of a quality submission. The burden\nof proof increases based on the potential value of the submission (rarity,\nseverity).\n\nInsufficient proof shall be defined as the judge needing to do additional\nresearch or coding in order to validate the claims made in the submission.\nTherefore it is recommended to have a coded proof of concept for high severity\nfindings in order to make it easy for a judge to validate your case.\n\nSubmissions which judges deem insufficiently proven will not be eligible for\nanything higher than a satisfactory score.\n\n###\n\nHow to include a proof of concept\n\nTo include a proof of concept (PoC) link in your submission, please follow\nthese steps, to ensure that your PoC remains private for the duration of the\naudit, but can be accessed publicly after the findings are made public:\n\n 1. 1.\n\nModify existing test files\n\n 2. 2.\n\nProvide the diff\n([instructions](https://gist.github.com/IllIllI000/21deaa6a55c95a6ec9ca893009ee494f)),\nwhich can be applied\n\n###\n\nLate submissions\n\nC4 does not accept late submissions under any circumstances; the audit\ndeadlines are firm. We recommend that you submit your findings at least a few\nminutes before the cut-off time, since the submission form can become slow or\nunresponsive in the final minutes of an audit, due to high traffic.\n\n###\n\nSubmissions to the wrong audit\n\nC4 cannot \"transfer\" your submission to another audit after the audit ends. If\nyou discover that you have accidentally submitted a finding to the wrong\naudit, please re-submit it to the correct audit, and then follow the steps\nbelow to withdraw your report from the other audit.\n\n###\n\nHow to submit Zero-day or otherwise highly sensitive bugs\n\nIf you discover a highly sensitive bug, e.g. a high-severity vulnerability\naffecting deployed contracts, please follow these steps:\n\n 1. 1.\n\nSubmit a placeholder finding using the audit submission form, with a non-\nspecific title (e.g. \"Potentially sensitive issue - disclosed privately\")\n\n 2. 2.\n\nWhile logged in to the Code4rena website, [submit a Help Desk\nrequest](https://code4rena.com/help/), and select \"Sensitive disclosure\" for\n\"What type of problem do you need help with?\" Please include: a. Name of\naudit, and b. Link to a private Gist containing the finding.\n\nCode4rena staff will review the issue immediately with the judge and sponsor,\nand will ensure the submission is added to the audit repo after any immediate\nrisks have been addressed.\n\n###\n\nFindings in \"parent\" of forked projects\n\nIf an issue is discovered during an audit that relates to the \"parent\" of a\nforked project, wardens should disclose the finding to the parent project\nfirst, and submit a placeholder finding to the C4 audit. Guidelines:\n\n * **Do not** disclose the parent / third party name within the body of the finding issue.\n\n * **Do** include a hash of the issue\n\nIt is the warden's responsibility to follow up with Code4rena in a timely\nmanner, based on what they hear back from the original project.\n\n###\n\nUse of writing assistance software, ChatGPT output, etc\n\nAs a professional audit platform, Code4rena's bar for a satisfactory\nsubmission is that it is **as good as one might find in a professional audit\nreport.**\n\nUsing the output of ChatGPT, GPT-3, or automated tools for audit submissions\nis highly discouraged as it leads to a high ratio of nonsense submissions.\nWardens are responsible for verifying the validity and clarity of their own\nsubmissions. Sending multiple poor quality submissions in a single audit will\nresult in all of your audit submissions being ruled invalidated. Additional\npenalties may also be applied at the discretion of judges and C4 staff.\n\nWe are aware this privileges native English speakers as online translation\nservices can result in unclear wordings; therefore, a submission should not be\nmarked as unsatisfactory purely based on grammar and spelling which does not\ninterfere with a judge's ability to understand the submission.\n\nJudges must make the best decision they can regarding quality and\nunderstandability of findings.\n\n###\n\nAutomated findings considered known issues\n\n * At the start of each audit, Code4rena runs a [Bot Race](https://www.code4rena.com/register/bot) where wardens compete to see whose AI-driven bot can create the highest quality and most thorough audit report.\n\n * The winning report is shared with all C4 wardens within 24 hours of the audit start time, both in the audit repo and in the audit's Discord channel.\n\n * All findings in the winning Bot Report will be declared publicly known issues, and therefore ineligible for awards.\n\nWardens may use automated tools as a first pass, and build on these findings\nto identify High and Medium severity issues (\"HM issues\"). However,\nsubmissions based on automated tools will have a higher burden of proof for\ndemonstrating to sponsors a relevant HM exploit path in order to be considered\nsatisfactory.\n\n##\n\nEditing a report\n\nTo edit a submitted finding in an open audit:\n\n 1. 1.\n\nSign into https://code4rena.com with your wallet.\n\n 2. 2.\n\nFind the audit on the C4 Audit page and click \"view competition\"\n\n 3. 3.\n\nClick on the \"Findings\" tab. There you will see a list of all your submissions\nfor that audit (both individual and team findings).\n\n 4. 4.\n\nSelect a finding from the list, make your edits and re-submit.\n\nFindings can be edited until the audit deadline.\n\n##\n\nWithdrawing a report\n\nIt is possible that a warden might want to withdraw a report after submitting\nit through the website. For example, if a new warden realizes they have not\nfollowed the report submission guidelines closely, or discover that a\nsubmission was outside the scope of the audit.\n\nIn this situation, wardens who wish to have a report withdrawn should:\n\n 1. 1.\n\nSign into https://code4rena.com with your wallet.\n\n 2. 2.\n\nFind the audit on the C4 Audit page and click \"view competition\n\n 3. 3.\n\nClick on the \"Findings\" tab. There you will see a list of all your submissions\nfor that audit (both individual and team findings).\n\n 4. 4.\n\nSelect a finding from the list, and choose the \"withdraw\" option.\n\nSubmissions must be withdrawn before the audit deadline.\n\n##\n\nUnauthorized test methods\n\nThe following methods are not authorized means of testing within C4 code\naudits:\n\n * Testing exploits on mainnet.\n\n * Network denial of service (DoS or DDoS) tests or other tests that impair access to or damage a system or data.\n\n * Physical testing (e.g. office access, open doors, tailgating), social engineering (e.g. phishing, vishing), or any other non-technical vulnerability testing.\n\n##\n\nQuestions\n\nQuestions regarding this policy can be addressed in the `#questions` channel\non the [C4 Discord](https://discord.gg/Dr6p5KDCdG). We also invite you to\ncontact us with suggestions for improving this policy.\n\n##\n\nAuthorization\n\nIf you make a good faith effort to comply with this policy during your\nsecurity research, C4, its affiliates, and sponsors will consider your\nresearch to be authorized.\n\nThe C4 community will work with you to understand and resolve any issues\nquickly, and C4, its affiliates, and sponsors will not recommend or pursue\nlegal action related to your research.\n\nShould legal action be initiated by a third party against you for activities\nthat were conducted in accordance with this policy, C4 will make this\nauthorization known.\n\n[Roles - PreviousWardens](/roles/wardens)[NextSolo\naudits](/roles/wardens/solo-audits)\n\nLast modified 2mo ago\n\nOn this page\n\nAudit contest guidelines\n\nSubmitting a report\n\nReport format\n\nBurden of proof\n\nHow to include a proof of concept\n\nLate submissions\n\nSubmissions to the wrong audit\n\nHow to submit Zero-day or otherwise highly sensitive bugs\n\nFindings in \"parent\" of forked projects\n\nUse of writing assistance software, ChatGPT output, etc\n\nAutomated findings considered known issues\n\nEditing a report\n\nWithdrawing a report\n\nUnauthorized test methods\n\nQuestions\n\nAuthorization\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/tools-and-resources.json b/qa_bot/knowledge_base/c4/docs/tools-and-resources.json new file mode 100644 index 0000000..c264e45 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/tools-and-resources.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/wardens/tools-and-resources", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Submission policy](/roles/wardens/submission-policy)\n\n[Solo audits](/roles/wardens/solo-audits)\n\n[Authenticate with your wallet](/roles/wardens/warden-auth)\n\n[Tools and resources](/roles/wardens/tools-and-resources)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Tools and resources\n\n###\n\n\ud83e\uddf0 Tools\n\n * \u200b[Hardhat](https://hardhat.org)\u200b\n\n * \u200b[Scaffold](https://github.com/austintgriffith/scaffold-eth)\u200b\n\n * \u200b[Solidity Visual Auditor](https://marketplace.visualstudio.com/items?itemName=tintinweb.solidity-visual-auditor)\u200b\n\n * \u200b[Remix](https://remix.ethereum.org)\u200b\n\n * \u200b[Mythril](https://github.com/ConsenSys/mythril)\u200b\n\n###\n\n\ud83d\udd2c Resources\n\n * \u200b[The Secureum](https://www.secureum.xyz/) \\- Ethereum security bootcamp\n\n * \u200b[Solidity Docs](https://docs.soliditylang.org/en/v0.8.1/)\u200b\n\n * \u200b[Vyper Docs](https://vyper.readthedocs.io/en/stable/)\u200b\n\n * \u200b[Solidity By Example](https://solidity-by-example.org)\u200b\n\n * \u200b[Smart Contract Weakness Classification](https://swcregistry.io)\u200b\n\n * \u200b[How to become a smart contract auditor](https://cmichel.io/how-to-become-a-smart-contract-auditor/), by cmichel (C4 [leaderboard](https://code4rena.com/leaderboard/) warden)\n\n * \u200b[Koios courses](https://app.koios.world/#/worlds) on blockchain technology and programming DAPPs\n\n * \u200b[Capture the Ether](https://capturetheether.com)\u200b\n\n * \u200b[CryptoZombies](https://cryptozombies.io) \\- Solidity tutorial\n\n[PreviousAuthenticate with your wallet](/roles/wardens/warden-auth)[Next \\-\nRolesSponsors](/roles/sponsors)\n\nLast modified 1yr ago\n\nOn this page\n\n\ud83e\uddf0 Tools\n\n\ud83d\udd2c Resources\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/warden-auth.json b/qa_bot/knowledge_base/c4/docs/warden-auth.json new file mode 100644 index 0000000..c722681 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/warden-auth.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/wardens/warden-auth", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Submission policy](/roles/wardens/submission-policy)\n\n[Solo audits](/roles/wardens/solo-audits)\n\n[Authenticate with your wallet](/roles/wardens/warden-auth)\n\n[Tools and resources](/roles/wardens/tools-and-resources)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Authenticate with your wallet\n\nIf you competed in C4 audits prior to October 5, 2022 and have not previously\nregistered your username via wallet authentication, you must register your\naccount.\n\n##\n\nAuthenticate with your wallet\n\nYou can authenticate with your wallet using either MetaMask or WalletConnect\nand submit findings without re-entering the same info over and over. After you\nregister you can have the option to log in with either your wallet or your\nusername and password.\n\nIf you have and existing Code4rena username, you can re-register your warden\nhandle at https://code4rena.com/ by connecting your wallet from the \"connect\"\ndropdown.\n\n##\n\nAuthenticate with username and password\n\nIf you're a new warden, you can choose to sign up with a username and password\nonly. **If you choose this option, you won't be able to link a wallet to your\naccount for authentication purposes later.**\n\n##\n\nHow to connect your wallet and submit findings\n\n * Go to https://code4rena.com/ and hover over the \"connect\" dropdown.\n\n * Choose whether to connect with MetaMask or WalletConnect, or sign up with just a username and password.\n\n * If you were a registered warden and submitted valid findings before May 25, 2022, you need to connect your wallet. Make sure to connect with an address you've used previously to receive earnings from code4rena audits prior to May 25, 2022.\n\n * Follow the prompts to connect your C4 username with your wallet.\n\n * Check your email for further instructions.\n\n##\n\nOnce your request to connect your wallet is complete and approved:\n\n * Sign in with the same method you used to register.\n\n * Proceed to the audit of your choice and submit a finding as usual.\n\n##\n\nTeams\n\nOnce individual team members are authenticated, they will be able to submit\nfindings as individuals or on behalf of the team.\n\n##\n\nFAQ / Troubleshooting\n\n###\n\n **If you were a registered warden prior to June 28, 2022 and you see this\nerror when you fill out the registration form:**\n\n`[your handle] is already a registered username`\n\nIf you submitted valid findings before May 25, 2022, please choose either\nMetaMask or WalletConnect from the \"connect\" dropdown.\n\n 1. 1.\n\nIf you did that and ended up on the registration page, please make sure you\nconnect with a wallet address you've used for a Code4rena contest prior to May\n25, 2022.\n\n 2. 2.\n\nIf that doesn't work, please [submit a Help Desk\nrequest](https://code4rena.com/help/).\n\n###\n\n **If your registration was merged but you still can't log in:**\n\nIf you registered with a wallet, make sure you are authenticating with the\nsame address you used when you registered.\n\nIf that doesn't work, please [submit a Help Desk\nrequest](https://code4rena.com/help/).\n\n###\n\n **If you previously submitted findings, but your wallet is not supported by\nWalletConnect:**\n\nThe best solution here is for you to import your wallet to either MetaMask, or\nanother wallet app that is supported by WalletConnect.\n\nIf you can't do that, then [submit a Help Desk\nrequest](https://code4rena.com/help/) and C4 staff will help you get your\naccount registered.\n\n###\n\n **Can I change the wallet address where I receive awards?**\n\nYou can change your payment information any time. Simply log in and go to\n[your account management page](https://code4rena.com/account).\n\nNote: for each contest, C4 distributes awards to the payment address on file\n_at the time of award calculation_.\n\n###\n\n **Can I change the wallet address I log in with?**\n\nUnfortunately due to some restrictions in Moralis, changing addresses is not\ncurrently supported as well as we'd like it to be in the future. There is one\nexception: if you use MetaMask, you can link multiple addresses to your C4\naccount.\n\nHowever, **it 's critical that you do not try to log in with the new address\nthat you want to link before you link it.** Attempting to connect while signed\nin to the new MetaMask address you want to connect will create a new user\nobject. That new user object will block your ability to associate that address\nwith your C4 account. Please watch the video below for details.\n\nTo link multiple addresses in MetaMask, please follow the instructions in this\nvideo:\n\nLinking Wallets with MetaMask (works only with MetaMask)\n\n###\n\n **Can I change my login method after I register?**\n\nIf you registered with your wallet, you can have the option to log in with\neither your wallet or your username and password.\n\nIf you registered prior to October 5, 2022, you can set your password by going\nto your [account management page](https://code4rena.com/account) and clicking\non the \"reset password\" button. This will send you an email with a password\nreset link. After you set your password, you can log in with your c4 username\nand password immediately.\n\nIf you registered with only a password and not a wallet, you will only be able\nto log in with your username and password.\n\n###\n\n **What if my wallet was hacked?**\n\nPlease follow these steps:\n\n 1. 1.\n\nIf you are not logged in and you haven't set up your password yet, click \"Log\nin\" from the connect dropdown and then click \"forgot password\" to get a\npassword reset link\n\n 2. 2.\n\nLog in with your username and password\n\n 3. 3.\n\nUpdate your payment addresses from the account page\n\n 4. 4.\n\nSubmit a help request through our [Help Desk](https://code4rena.com/help/)\nwhile logged in so that we can remove the hacked wallet from your account.\n\n[PreviousSolo audits](/roles/wardens/solo-audits)[NextTools and\nresources](/roles/wardens/tools-and-resources)\n\nLast modified 2mo ago\n\nOn this page\n\nAuthenticate with your wallet\n\nAuthenticate with username and password\n\nHow to connect your wallet and submit findings\n\nOnce your request to connect your wallet is complete and approved:\n\nTeams\n\nFAQ / Troubleshooting\n\nIf you were a registered warden prior to June 28, 2022 and you see this error\nwhen you fill out the registration form:\n\nIf your registration was merged but you still can't log in:\n\nIf you previously submitted findings, but your wallet is not supported by\nWalletConnect:\n\nCan I change the wallet address where I receive awards?\n\nCan I change the wallet address I log in with?\n\nCan I change my login method after I register?\n\nWhat if my wallet was hacked?\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/wardens.json b/qa_bot/knowledge_base/c4/docs/wardens.json new file mode 100644 index 0000000..6420ac2 --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/wardens.json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/roles/wardens", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Submission policy](/roles/wardens/submission-policy)\n\n[Solo audits](/roles/wardens/solo-audits)\n\n[Authenticate with your wallet](/roles/wardens/warden-auth)\n\n[Tools and resources](/roles/wardens/tools-and-resources)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Wardens\n\nWardens protect the DeFi ecosystem from threats by auditing code.\n\nCode4rena audits let people of a wide range of skill levels get rewarded while\nshowcasing their talent in order to make the DeFi ecosystem more secure.\n\n\u2728Stay up to date with new audits by [following C4 on\nTwitter](https://twitter.com/code4rena) and joining our [community\nDiscord](https://discord.gg/EY5dvm3evD).\n\n###\n\nJoining an audit\n\nAnyone can apply to participate in an audit. [Apply using this\nform](https://code4rena.com/register/), then pop into Discord and give us a\nhowl in `#`[`i-want-to-be-a-warden`](https://discord.gg/pmGC2gH9Mg).\n\nOnce you've completed those steps and been approved, [have a look at the C4\nwebsite](https://code4rena.com), where you'll find a list of open and upcoming\naudits, along with their pot size, start and end date, and other relevant\ninformation. Active audits will typically include a link to the code repo, as\nwell as the submission form for findings.\n\nAs a reminder, for Wardens participating in code audits, please familiarize\nyourself with the [submission policy](/roles/wardens/submission-policy) and\n[judging criteria](/awarding/judging-criteria) prior to participating.\n\n###\n\nRegistering a team\n\nTo register a team, you must first register your individual warden handles,\nand then register your team [here](https://code4rena.com/register/team/).\n\nOnce a team is created, you have the ability to add/remove members and update\nyour payment address while logged in to the Code4rena website.\n\nAll team registrations and updates will create pull requests that are flagged\nfor the C4 team to review and approve. Please allow 24-48 business hours for\nprocessing.\n\n\u2757\ufe0f **Important note: Team awards are sent as a single payment to** ** _one_**\n**wallet.** We strongly recommend using a multisig wallet, or a tool like\n[PaymentSplitter](https://docs.openzeppelin.com/contracts/4.x/api/finance#PaymentSplitter),\nto distribute awards among your team members. Note that C4 does not track\nwhich team member submitted each finding; your team is responsible for keeping\ntrack of that information, and distributing awards. The team structure at C4\nis designed so that you submit as a team and get paid as a team.\n\n###\n\nAudit timeline\n\n * **Most audits run for 3-7 days,** and typically start and end at 20:00 UTC.\n\n * The rest of our audit timeline is documented on the [Audit timeline](/structure/our-process) page.\n\n###\n\nQuestions?\n\nWhen audit sponsors come to Code4rena for an audit, we always encourage them\nto provide documentation, and to make themselves available for questions, so\nthey get the most out of their audit.\n\nWhen a sponsor designates a team member who is available for questions, that\nperson will introduce themselves in the C4 Discord (in a audit-specific\nchannel), and we recommend contacting them via DM with any questions you may\nhave.\n\n###\n\n\u23e9 TL;DR\n\n * Turn in your reports before the audit end time.\n\n * For each audit, submit your Medium and High risk findings individually.\n\n * Bundle all of your low-risk and non-critical findings into a _single_ QA report.\n\n * Similarly, list _all_ of your gas optimizations together in a single Gas report.\n\n * Be sure to [register your handle and Polygon address](https://code4rena.com/register/) to receive your share.\n\n * Publicly disclosing (e.g. publishing or discussing) any discovered bugs or vulnerabilities before the audit report has been published is grounds for disqualification from all C4 events.\n\n[PreviousCode4rena](/)[NextSubmission policy](/roles/wardens/submission-\npolicy)\n\nLast modified 3d ago\n\nOn this page\n\nJoining an audit\n\nRegistering a team\n\nAudit timeline\n\nQuestions?\n\n\u23e9 TL;DR\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/docs/where-can-i-find....json b/qa_bot/knowledge_base/c4/docs/where-can-i-find....json new file mode 100644 index 0000000..7fe3eac --- /dev/null +++ b/qa_bot/knowledge_base/c4/docs/where-can-i-find....json @@ -0,0 +1 @@ +{"url": "https://docs.code4rena.com/structure/where-can-i-find...", "md_content": "[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nSearch\n\n\u2303K\n\n[Code4rena](/)\n\nRoles\n\n[Wardens](/roles/wardens)\n\n[Sponsors](/roles/sponsors)\n\n[Judges](/roles/judges)\n\n[Certified contributors](/roles/certified-contributors)\n\nAwarding\n\n[Incentive model and awards](/awarding/incentive-model-and-awards)\n\n[Judging criteria](/awarding/judging-criteria)\n\n[Fairness and validity](/awarding/fairness-and-validity)\n\nPhilosophy\n\n[Security is about people](/philosophy/security-is-about-people)\n\n[The culture we're building](/philosophy/how-we-work)\n\n[Intentionally structured](/philosophy/intentionally-structured)\n\nOther Details\n\n[FAQ](/structure/frequently-asked-questions)\n\n[Audit timeline](/structure/our-process)\n\n[Where can I find\u2026?](/structure/where-can-i-find...)\n\n[Powered By\nGitBook](https://www.gitbook.com/?utm_source=content&utm_medium=trademark&utm_campaign=-MYGYvqTD29_fAaod9NJ)\n\n# Where can I find\u2026?\n\nThis page aims to point to all the sources of information regarding C4.\n\n###\n\nInformation\n\n * \u200b[Current and scheduled audits](https://code4rena.com) (C4 website)\n\n * \u200b[Audit reports](https://code4rena.com/reports/) (C4 website)\n\n * \u200b[Past findings and audit code](https://github.com/code-423n4) (GitHub)\n\n * \u200b[Award calculation tool](https://github.com/code-423n4/awardcalc) (GitHub)\n\n###\n\nTools and channels\n\nPublic:\n\n * \u200b[Public documentation](https://docs.code4rena.com) (GitBook)\n\n * \u200b[Community chat](https://discord.gg/EY5dvm3evD) (Discord)\n\n * \u200b[Proposals and requests for comment](https://forum.code4rena.com) (forum)\n\nPrivate:\n\n * Meeting notes, early ideas (The Greenhouse)\n\n * Operations manual (Notion)\n\n * C4 Team, Warden, Sponsors, and Judges channels (Discord)\n\n###\n\nThings that don't currently exist in a great way but should:\n\n * How I can get involved in C4 beyond participating in an audit?\n\n * What's the status of current judging / awarding / reporting?\n\n * Criteria for scoping and how we've scoped past audits\n\n###\n\nWhat else?\n\nIf there's some piece of information you're looking for, it may be that it's\nnot public, but you can likely get access to it. Feel free to reach out in the\n**#questions** channel in Discord.\n\n[Other Details - PreviousAudit timeline](/structure/our-process)\n\nLast modified 2mo ago\n\nOn this page\n\nInformation\n\nTools and channels\n\nThings that don't currently exist in a great way but should:\n\nWhat else?\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/2023-01-reserve-contest.json b/qa_bot/knowledge_base/c4/website/2023-01-reserve-contest.json new file mode 100644 index 0000000..83d57d5 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/2023-01-reserve-contest.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/contests/2023-01-reserve-contest", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/2023-05-chainlink-cross-chain-services-ccip-and-arm-network.json b/qa_bot/knowledge_base/c4/website/2023-05-chainlink-cross-chain-services-ccip-and-arm-network.json new file mode 100644 index 0000000..e6deccd --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/2023-05-chainlink-cross-chain-services-ccip-and-arm-network.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/contests/2023-05-chainlink-cross-chain-services-ccip-and-arm-network", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@0x52.json b/qa_bot/knowledge_base/c4/website/@0x52.json new file mode 100644 index 0000000..1e2aa5b --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@0x52.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@0x52", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# 0x52\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nI find bugs | [@SpearbitDAO](https://twitter.com/SpearbitDAO) LSR | Discord:\n0x52#0542\n\n## Highlights\n\n * Made $680k~ in my first year of smart contract auditing\n\nFirst year stats: Made ~$680k, audited 115 codebases, found ~140 high risk\nvulnerabilities and ~250 medium, spent ~1300 hours reviewing code, created 267\nfiles in remix, drank ~90 gallons of pre-workout (my caffeinated beverage of\nchoice).\n\n * Ranked #13 on Code4rena all-time leaderboard\n\nAs of June 2023, I'm ranked #13 on Code4rena's all-time leaderboard. With 205\nfindings in total, I've earned $214k+ on the platform.\n\n## 0x52's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=0x52&source=0x52-profile)\n\n * [Twitter](https://twitter.com/IAm0x52)\n\n## Helped Secure\n\n[](https://twitter.com/mimo_labs \"Mimo\nDeFi\")[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/infinitydotxyz \"Infinity NFT\nMarketplace\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/axelarcore \"Axelar\nNetwork\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/QuickswapDEX \"QuickSwap and\nStellaSwap\")[](https://twitter.com/3xcalibur69\n\"3xcalibur\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/caviarAMM\n\"Caviar\")[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/reserveprotocol\n\"Reserve\")[](https://twitter.com/AragonProject \"Aragon\nProtocol\")[](https://twitter.com/NeoTokyoCode \"Neo\nTokyo\")[](https://twitter.com/PolynomialFi \"Polynomial\nProtocol\")[](https://twitter.com/asymmetryfin \"Asymmetry\nFinance\")[](https://twitter.com/backdfund\n\"Backd\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/EnsoFinance \"Enso\nFinance\")[](https://twitter.com/SturdyFinance\n\"Sturdy\")[](https://twitter.com/callyfinance\n\"Cally\")[](https://twitter.com/AlchemixFi\n\"Alchemix\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/IlluminateFi\n\"Illuminate\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/shapeshift\n\"Yieldy\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/fiatdao \"FIAT\nDAO\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/ensdomains\n\"ENS\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/vtvlco \"VTVL\")[](https://twitter.com/golom_io\n\"Golom\")[](https://twitter.com/artgobblers \"Art\nGobblers\")[](https://twitter.com/blur_io \"Blur\nExchange\")[](https://twitter.com/Y2kFinance \"Y2k\nFinance\")[](https://twitter.com/sizemarkets\n\"SIZE\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/debtdao \"Debt\nDAO\")[](https://twitter.com/looksrare\n\"LooksRare\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/OndoFinance \"Ondo\nFinance\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/Popcorn_DAO\n\"Popcorn\")[](https://twitter.com/EthosReserve \"Ethos Reserve\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@0xA5DF.json b/qa_bot/knowledge_base/c4/website/@0xA5DF.json new file mode 100644 index 0000000..2b7a152 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@0xA5DF.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@0xA5DF", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# 0xA5DF\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nMove slow and break things\n\n## Highlights\n\n * [Placed 2nd on Reserve Protocol audit](https://code4rena.com/contests/2023-01-reserve-contest#top)\n\nIn this audit, I came 2nd and found 1 high and 3 medium severity\nvulnerabilities. In the following mitigation contest I came #1\n\n * Ranked #45 on Code4rena all-time leaderboard\n\nAs of June 2023, I'm currently ranked #45 on Code4rena's all time leaderboard,\nhaving earned $67k+ in awards (excluding invitational contests)\n\n## 0xA5DF's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=0xA5DF&source=0xA5DF-profile)\n\n * [Twitter](https://twitter.com/0xA5DF)\n\n## Helped Secure\n\n[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/reserveprotocol\n\"Reserve\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/puttyfi \"Putty\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/vtvlco \"VTVL\")[](https://twitter.com/golom_io\n\"Golom\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/dripsnetwork \"Drips Protocol\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@0xsomeone.json b/qa_bot/knowledge_base/c4/website/@0xsomeone.json new file mode 100644 index 0000000..d015312 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@0xsomeone.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@0xsomeone", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# 0xsomeone\n\nwarden\n\nOG Warden\n\nHead of Security Services @ Omniscia Mainly focusing on smart contract\nsecurity.\n\n## Highlights\n\n * Ranked #24 on Code4rena all-time leaderboard\n\nAs of June 2023, I'm ranked #24 on Code4rena's all-time leaderboard, with\n$119k+ in awards.\n\n * Placed 1st in Opensea Seaport 1.2 Code4rena audit\n\nIn this audit, I was the only person to find a high or medium severity\nvulnerability in the codebase.\n\n## 0xsomeone's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n * [Github](https://github.com/alex-ppg)\n\n## Helped Secure\n\n[](https://meebits.larvalabs.com/ \"LarvaLabs Meebits\")[](https://nftx.org/\n\"NFTX\")[](https://twitter.com/AlchemixFi\n\"Alchemix\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://maple.finance/ \"Maple Finance\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/EnsoFinance \"Enso\nFinance\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/opensea\n\"OpenSea\")[](https://twitter.com/AstariaXYZ\n\"Astaria\")[](https://twitter.com/EthosReserve \"Ethos\nReserve\")[](https://twitter.com/AragonProject \"Aragon Protocol\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@HollaDieWaldfee.json b/qa_bot/knowledge_base/c4/website/@HollaDieWaldfee.json new file mode 100644 index 0000000..dc30f33 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@HollaDieWaldfee.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@HollaDieWaldfee", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# HollaDieWaldfee\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nLead Senior Watson [@sherlockdefi](https://twitter.com/sherlockdefi), Senior\nAuditor for [Trust Security](http://trust-security.xyz) and OG Warden\n[@code4rena](https://twitter.com/code4rena)\n\nMy audits for Sherlock are performed under the alias \"roguereddwarf\".\n\nCheck out my work in my [Audit\nPortfolio](https://github.com/HollaDieWaldfee100/audits)\n\n## Highlights\n\n * Ranked #4 on Code4rena 2023 leaderboard\n\nAs of June 2023, I'm currently ranked #4 on Code4rena's 2023 leaderboard. So\nfar, I've earned $53k+ in awards, and have found 12 high and 20 medium\nseverity vul\n\n * [Placed 1st in Perennial's Sherlock audit](https://audits.sherlock.xyz/contests/79/leaderboard)\n\nI was the Lead Senior Watson for this audit and placed 1st with 4 solo medium\nfindings.\n\n * [Placed 1st in DODO's Sherlock audit](https://audits.sherlock.xyz/contests/78/leaderboard)\n\nI was the Lead Senior Watson for this audit and placed 1st with 1 solo medium\nfinding.\n\n * [Placed 1st in Reserve Protocol's Code4rena audit](https://code4rena.com/contests/2023-01-reserve-contest#top)\n\nIn this audit, I found 1 high severity vulnerability and 6 mediums, 1 of which\nwas a solo finding.\n\n## Available for hire\n\nI specialize in auditing Smart Contracts on EVM-based chains. For inquiries\nand bookings, please contact me.\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=HollaDieWaldfee&source=HollaDieWaldfee-profile)\n\n * [Twitter](https://twitter.com/HollaWaldfee100)\n * [Audit Portfolio](https://github.com/HollaDieWaldfee100/audits)\n\n## Helped Secure\n\n[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/reserveprotocol\n\"Reserve\")[](https://twitter.com/dripsnetwork \"Drips\nProtocol\")[](https://twitter.com/asymmetryfin \"Asymmetry\nFinance\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/rabbithole_gg \"RabbitHole\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@IllIllI.json b/qa_bot/knowledge_base/c4/website/@IllIllI.json new file mode 100644 index 0000000..3ad611e --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@IllIllI.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@IllIllI", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# IllIllI\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\n## Highlights\n\n * [Ranked #10 on Code4rena all-time leaderboard](https://code4rena.com/leaderboard)\n\nAs of June 2023, I'm currently ranked #10 on Code4rena's all-time leaderboard,\nhaving earned over $250k in awards.\n\n * [Ranked #1 on the Sherlock leaderboard](https://audits.sherlock.xyz/leaderboard)\n\nAs of June 2023, I'm currently ranked #1 on Sherlock's leaderboard, having\nearned 538 points and over $190k in awards.\n\n * [#1 bot race bot](https://www.botracer.xyz/leaderboard)\n\nTop bot in most bot races (e.g.\nhttps://gist.github.com/liveactionllama/27513952718ec3cbcf9de0fda7fef49c)\n\n## IllIllI's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\nI'm available for in-depth audits, or much less expensive bot-race-style\naudits\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=IllIllI&source=IllIllI-profile)\n\n## Helped Secure\n\n[](https://twitter.com/jpegd_69 \"JPEG'd\")[](https://twitter.com/SkaleNetwork\n\"SKALE\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/ConcurFinance \"Concur\nFinance\")[](https://twitter.com/HubbleExchange\n\"Hubble\")[](https://twitter.com/NestedFinance \"Nested\nFinance\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/MIM_Spell \"Abracadabra\nMoney\")[](https://twitter.com/bunker_finance\n\"bunker.finance\")[](https://twitter.com/SturdyFinance\n\"Sturdy\")[](https://twitter.com/joynxyz \"Joyn\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/mimo_labs \"Mimo\nDeFi\")[](https://twitter.com/callyfinance\n\"Cally\")[](https://twitter.com/opensea\n\"OpenSea\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/CUDOS_\n\"Cudos\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/artgobblers \"Art\nGobblers\")[](https://twitter.com/blockswap_team \"Stakehouse\nProtocol\")[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/Popcorn_DAO \"Popcorn\")[](https://maiadao.io\n\"Maia DAO Ecosystem\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/BehodlerAMM\n\"Behodler\")[](https://twitter.com/OpenLeverage\n\"OpenLeverage\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/jcam_official \"JPYC\")[](https://maple.finance/\n\"Maple Finance\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/lifiprotocol\n\"LI.FI\")[](https://twitter.com/RollaFinance\n\"Rolla\")[](https://twitter.com/dualityfi \"Duality\nFocus\")[](https://twitter.com/axelarcore \"Axelar\nNetwork\")[](https://twitter.com/TimeswapLabs\n\"Timeswap\")[](https://twitter.com/backdfund\n\"Backd\")[](https://twitter.com/LensProtocol \"Aave\nLens\")[](https://twitter.com/VoltProtocol \"Volt\nProtocol\")[](https://twitter.com/sublimefinance\n\"Sublime\")[](https://twitter.com/phuture_finance \"Phuture\nFinance\")[](https://twitter.com/EnsoFinance \"Enso\nFinance\")[](https://twitter.com/AlchemixFi\n\"Alchemix\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/infinitydotxyz \"Infinity NFT\nMarketplace\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/IlluminateFi\n\"Illuminate\")[](https://twitter.com/ookitrade\n\"Ooki\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/shapeshift\n\"Yieldy\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/fiatdao \"FIAT\nDAO\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/ensdomains\n\"ENS\")[](https://twitter.com/anchor_protocol\n\"Anchor\")[](https://twitter.com/vtvlco\n\"VTVL\")[](https://twitter.com/QuickswapDEX \"QuickSwap and\nStellaSwap\")[](https://twitter.com/golom_io\n\"Golom\")[](https://twitter.com/graphprotocol \"The\nGraph\")[](https://twitter.com/blur_io \"Blur\nExchange\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/zksync \"zkSync\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/debtdao \"Debt\nDAO\")[](https://twitter.com/looksrare\n\"LooksRare\")[](https://twitter.com/caviarAMM\n\"Caviar\")[](https://twitter.com/forgeries_io \"Forgeries\")\n\n[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/OndoFinance \"Ondo\nFinance\")[](https://twitter.com/reserveprotocol\n\"Reserve\")[](https://twitter.com/numoen\n\"Numoen\")[](https://twitter.com/rabbithole_gg\n\"RabbitHole\")[](https://twitter.com/AstariaXYZ\n\"Astaria\")[](https://twitter.com/dripsnetwork \"Drips Protocol\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@Jeiwan.json b/qa_bot/knowledge_base/c4/website/@Jeiwan.json new file mode 100644 index 0000000..ec72d4c --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@Jeiwan.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@Jeiwan", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# Jeiwan\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nCEO of Unemployed.\n\n## Highlights\n\n * Placed 1st in Papr by Backed Code4rena audit\n\n * Ranked #38 on Code4rena all-time leaderboard\n\nAs of June 2023, I'm currently ranked #38 on Code4rena's all-time leaderboard,\nhaving earned $87k+ in awards.\n\n * Placed in top 5 for 7 Code4rena audits\n\nProjects I helped secure include Canto, Illuminate, Inverse, Trader Joe,\nAstaria, and more.\n\n## Jeiwan's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=Jeiwan&source=Jeiwan-profile)\n\n * [Website](https://jeiwan.net/)\n * [Twitter](https://twitter.com/jeiwan7)\n * [Github](https://github.com/jeiwan)\n\n## Helped Secure\n\n[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/QuickswapDEX \"QuickSwap and\nStellaSwap\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/AstariaXYZ \"Astaria\")\n\n[](https://twitter.com/AngleProtocol \"Angle\nProtocol\")[](https://twitter.com/goodentrylabs \"Good\nEntry\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/blur_io \"Blur\nExchange\")[](https://twitter.com/Y2kFinance \"Y2k\nFinance\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/InverseFinance \"Inverse\nFinance\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/debtdao \"Debt\nDAO\")[](https://twitter.com/blockswap_team \"Stakehouse\nProtocol\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/caviarAMM \"Caviar\")\n\n[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/NeoTokyoCode \"Neo\nTokyo\")[](https://twitter.com/zksync \"zkSync\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@Trust.json b/qa_bot/knowledge_base/c4/website/@Trust.json new file mode 100644 index 0000000..37d1b83 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@Trust.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@Trust", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# Trust\n\nwarden\n\nOG Warden\n\nDirect booking through Twitter / Trust Security website\n\n## Highlights\n\n * [#1 Warden of the past year](https://app.auditcrew.xyz/code4rena/leaderboard)\n\n * [Trust Security founder and auditor](https://www.trust-security.xyz/)\n\nLeveraging the experience in the competitive security landscape in order to\nprovide the top-quality private auditing for our clients.\n\n * [Immunefi leaderboard](https://immunefi.com/leaderboard/)\n\nOne of the top hackers on Immunefi with $250k winnings and dozens of paid\nhigh-severity findings.\n\n * [Led Optimism Sherlock contest w/ Zach Obront](https://app.sherlock.xyz/audits/contests/38)\n\nWe achieved 1st place and received $213k in rewards.\n\n## Trust's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## About\n\nTop eco-system whitehat determined to make Web3 safe for the average adopter.\n\n * [Twitter](https://twitter.com/trust__90)\n * [Website](https://www.trust-security.xyz/)\n\n## Helped Secure\n\n[](https://twitter.com/blockswap_team \"Stakehouse\nProtocol\")[](https://twitter.com/blur_io \"Blur\nExchange\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/basinexchange\n\"Basin\")[](https://twitter.com/ensdomains\n\"ENS\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/vtvlco\n\"VTVL\")[](https://twitter.com/QuickswapDEX \"QuickSwap and\nStellaSwap\")[](https://twitter.com/graphprotocol \"The\nGraph\")[](https://twitter.com/sizemarkets\n\"SIZE\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/debtdao \"Debt\nDAO\")[](https://twitter.com/forgeries_io \"Forgeries\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@akshaysrivastav.json b/qa_bot/knowledge_base/c4/website/@akshaysrivastav.json new file mode 100644 index 0000000..49ba292 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@akshaysrivastav.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@akshaysrivastav", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# AkshaySrivastav\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nIndependent Security Researcher | Smart Contract Auditor | Top warden\n[@code4rena](https://twitter.com/code4rena)\n\n## Highlights\n\n * [Placed 1st in Code4rena Ondo Finance audit](https://github.com/code-423n4/2023-01-ondo-findings/blob/main/report.md#m-02-first-deposit-bug)\n\nIn this audit, I found 3 medium severity vulnerabilities, 1 of which was a\nsolo finding.\n\n * [Placed 2nd in Code4rena Chainlink audit](https://code4rena.com/contests/2023-05-chainlink-cross-chain-services-ccip-and-arm-network#top)\n\nIn this audit, I found 1 High severity vulnerability and 1 Medium severity\nvulnerability\n\n * [Placed 5th in Code4rena Aragon audit](https://github.com/code-423n4/2023-03-aragon-findings/issues/129)\n\nIn this audit, I found 1 medium severity vulnerability.\n\n## AkshaySrivastav's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=AkshaySrivastav&source=AkshaySrivastav-profile)\n\n## About\n\nAkshay is an independent security researcher who has found numerous critical\nbugs in DeFi projects which could have resulted in major loss to the protocol\nand its users.\n\n * [Twitter](https://twitter.com/akshaysrivastv)\n * [Website](https://www.akshaysrivastav.com/)\n\n## Helped Secure\n\n[](https://twitter.com/arbitrum \"Arbitrum\nFoundation\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/ensdomains\n\"ENS\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/asymmetryfin \"Asymmetry\nFinance\")[](https://twitter.com/vtvlco\n\"VTVL\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/OndoFinance \"Ondo\nFinance\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/reserveprotocol\n\"Reserve\")[](https://twitter.com/rabbithole_gg\n\"RabbitHole\")[](https://twitter.com/EthosReserve \"Ethos\nReserve\")[](https://twitter.com/AragonProject \"Aragon\nProtocol\")[](https://twitter.com/caviarAMM \"Caviar\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@bin2chen.json b/qa_bot/knowledge_base/c4/website/@bin2chen.json new file mode 100644 index 0000000..aedd927 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@bin2chen.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@bin2chen", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# bin2chen\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nFocus Web3 Security | Solidity smart contracts\n\n## Highlights\n\n * Ranked #32 on Code4rena all-time leaderboard\n\nAs of June 2023, I'm currently ranked #32 on Code4rena's all-time leaderboard,\nhaving earned $99k+ in awards.\n\n * Ranked #5 on Code4rena 2023 leaderboard\n\nAs of June 2023, I'm currently ranked #5 on Code4rena's 2023 leaderboard.\n\n * Placed 1st in Code4rena Escher audit\n\nIn this audit, I found 3 high and 2 medium severity vulnerabilities.\n\n## bin2chen's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=bin2chen&source=bin2chen-profile)\n\n * [Twitter](https://twitter.com/bin2chen)\n\n## Helped Secure\n\n[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/arcade_xyz\n\"Arcade.xyz\")[](https://twitter.com/xETH_Dev\n\"xETH\")[](https://twitter.com/fiatdao \"FIAT\nDAO\")[](https://twitter.com/Y2kFinance \"Y2k\nFinance\")[](https://twitter.com/blur_io \"Blur\nExchange\")[](https://twitter.com/Popcorn_DAO\n\"Popcorn\")[](https://twitter.com/rabbithole_gg\n\"RabbitHole\")[](https://twitter.com/PolynomialFi \"Polynomial\nProtocol\")[](https://twitter.com/frankencoinzchf\n\"Frankencoin\")[](https://twitter.com/eigenlayer\n\"EigenLayer\")[](https://twitter.com/venusprotocol \"Venus\nProtocol\")[](https://twitter.com/MoonwellDeFi\n\"Moonwell\")[](https://twitter.com/ensdomains\n\"ENS\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/staderlabs_eth \"Stader\nLabs\")[](https://twitter.com/chainlink \"Chainlink\")[](https://maiadao.io \"Maia\nDAO Ecosystem\")[](https://twitter.com/asymmetryfin \"Asymmetry\nFinance\")[](https://twitter.com/mimo_labs \"Mimo\nDeFi\")[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/vtvlco \"VTVL\")[](https://twitter.com/golom_io\n\"Golom\")[](https://twitter.com/artgobblers \"Art\nGobblers\")[](https://twitter.com/3xcalibur69\n\"3xcalibur\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/sizemarkets\n\"SIZE\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/InverseFinance \"Inverse\nFinance\")[](https://twitter.com/debtdao \"Debt\nDAO\")[](https://twitter.com/blockswap_team \"Stakehouse\nProtocol\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/forgeries_io\n\"Forgeries\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/OndoFinance \"Ondo\nFinance\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/AstariaXYZ\n\"Astaria\")[](https://twitter.com/wenwincom\n\"Wenwin\")[](https://twitter.com/EthosReserve \"Ethos\nReserve\")[](https://twitter.com/zksync\n\"zkSync\")[](https://twitter.com/CantoNamespace \"Canto Identity\nSubprotocols\")[](https://twitter.com/caviarAMM \"Caviar\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@cccz.json b/qa_bot/knowledge_base/c4/website/@cccz.json new file mode 100644 index 0000000..0d85eb8 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@cccz.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@cccz", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# cccz\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nI am an auditor who continues to learn.\n\n## Highlights\n\n * Ranked #9 on Code4rena all-time leaderboard\n\nAs of June 2023, I'm currently ranked #9 on Code4rena's all-time leaderboard,\nwith $280k+ in awards.\n\n * Placed in the top 5 in over 20 Code4rena audits\n\nProjects audited included NFTx, Maple Finance, LooksRare, PoolTogether, and\nmore.\n\n## cccz's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=cccz&source=cccz-profile)\n\n * [Twitter](https://twitter.com/hellocccz)\n\n## Helped Secure\n\n[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/SandclockOrg \"Sandclock\")[](https://nftx.org/\n\"NFTX\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/jpegd_69\n\"JPEG'd\")[](https://twitter.com/ConcurFinance \"Concur\nFinance\")[](https://twitter.com/lifiprotocol\n\"LI.FI\")[](https://twitter.com/phuture_finance \"Phuture\nFinance\")[](https://twitter.com/LybraFinanceLSD \"Lybra\nFinance\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/fiatdao \"FIAT\nDAO\")[](https://twitter.com/golom_io \"Golom\")[](https://twitter.com/blur_io\n\"Blur Exchange\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/OndoFinance \"Ondo\nFinance\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/AstariaXYZ\n\"Astaria\")[](https://twitter.com/rabbithole_gg\n\"RabbitHole\")[](https://twitter.com/frankencoinzchf\n\"Frankencoin\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/yetifinance \"Yeti\nFinance\")[](https://twitter.com/TimeswapLabs\n\"Timeswap\")[](https://twitter.com/xdefi_wallet\n\"XDEFI\")[](https://twitter.com/insuredao\n\"InsureDAO\")[](https://twitter.com/Livepeer\n\"Livepeer\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://elasticswap.org/\n\"ElasticSwap\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/BehodlerAMM\n\"Behodler\")[](https://twitter.com/OpenLeverage\n\"OpenLeverage\")[](https://twitter.com/VaderProtocol \"Vader\nProtocol\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/jcam_official\n\"JPYC\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/HubbleExchange\n\"Hubble\")[](https://maple.finance/ \"Maple\nFinance\")[](https://twitter.com/sherlockdefi\n\"Sherlock\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/RollaFinance\n\"Rolla\")[](https://twitter.com/dualityfi \"Duality\nFocus\")[](https://twitter.com/axelarcore \"Axelar\nNetwork\")[](https://twitter.com/backdfund\n\"Backd\")[](https://twitter.com/LensProtocol \"Aave\nLens\")[](https://twitter.com/VoltProtocol \"Volt\nProtocol\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/bunker_finance\n\"bunker.finance\")[](https://twitter.com/MIM_Spell \"Abracadabra\nMoney\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/EnsoFinance \"Enso\nFinance\")[](https://twitter.com/SturdyFinance\n\"Sturdy\")[](https://twitter.com/SkaleNetwork\n\"SKALE\")[](https://twitter.com/mimo_labs \"Mimo\nDeFi\")[](https://twitter.com/joynxyz\n\"Joyn\")[](https://twitter.com/callyfinance\n\"Cally\")[](https://twitter.com/CUDOS_\n\"Cudos\")[](https://twitter.com/AlchemixFi\n\"Alchemix\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/infinitydotxyz \"Infinity NFT\nMarketplace\")[](https://twitter.com/opensea\n\"OpenSea\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/IlluminateFi\n\"Illuminate\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/ookitrade\n\"Ooki\")[](https://twitter.com/NestedFinance \"Nested\nFinance\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/shapeshift\n\"Yieldy\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/ensdomains\n\"ENS\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/anchor_protocol\n\"Anchor\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/vtvlco\n\"VTVL\")[](https://twitter.com/QuickswapDEX \"QuickSwap and\nStellaSwap\")[](https://twitter.com/artgobblers \"Art\nGobblers\")[](https://twitter.com/graphprotocol \"The\nGraph\")[](https://twitter.com/Y2kFinance \"Y2k\nFinance\")[](https://twitter.com/3xcalibur69\n\"3xcalibur\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/sizemarkets\n\"SIZE\")[](https://twitter.com/InverseFinance \"Inverse\nFinance\")[](https://twitter.com/zksync \"zkSync\")[](https://twitter.com/debtdao\n\"Debt DAO\")[](https://twitter.com/looksrare\n\"LooksRare\")[](https://twitter.com/blockswap_team \"Stakehouse\nProtocol\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/caviarAMM\n\"Caviar\")[](https://twitter.com/forgeries_io \"Forgeries\")\n\n[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/Popcorn_DAO\n\"Popcorn\")[](https://twitter.com/NeoTokyoCode \"Neo\nTokyo\")[](https://twitter.com/CantoNamespace \"Canto Identity Subprotocols\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@cmichel.json b/qa_bot/knowledge_base/c4/website/@cmichel.json new file mode 100644 index 0000000..79da34f --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@cmichel.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@cmichel", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# cmichel\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nI'm a **security researcher \ud83c\udff9\ud83d\udc1b**, **developer \ud83d\udc68\u200d\ud83d\udcbb**, and **author \u270d\ufe0f**. For\nthe past 3 years, I've been focusing on smart contract security audits.\n\n## Highlights\n\n * [Ranked #1 all-time on Code4rena leaderboard](https://c4stage.code4rena.com/leaderboard)\n\nAs of June 2023, I'm ranked the #1 security auditor on Code4rena's all-time\nleaderboard with $1.3M in earnings.\n\n## cmichel's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=cmichel&source=cmichel-profile)\n\n * [Twitter](https://twitter.com/cmichelio)\n * [Blog](https://cmichel.io/)\n\n## Helped Secure\n\n[](https://twitter.com/VaderProtocol \"Vader\nProtocol\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/realitycards \"Reality\nCards\")[](https://twitter.com/SpartanProtocol \"Spartan\nProtocol\")[](https://marginswap.finance/ \"Marginswap\")[](https://nftx.org/\n\"NFTX\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/SushiSwap \"Sushi\")[](https://maple.finance/\n\"Maple Finance\")[](https://twitter.com/yetifinance \"Yeti\nFinance\")[](https://twitter.com/Amun\n\"Amun\")[](https://twitter.com/sublimefinance\n\"Sublime\")[](https://twitter.com/TallyCash\n\"Tally\")[](https://twitter.com/TracerDAO\n\"Tracer\")[](https://twitter.com/NestedFinance \"Nested\nFinance\")[](https://twitter.com/KuiperFinance\n\"Kuiper\")[](https://twitter.com/MochiDeFi\n\"Mochi\")[](https://twitter.com/LensProtocol \"Aave\nLens\")[](https://twitter.com/SkaleNetwork\n\"SKALE\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/jpegd_69 \"JPEG'd\")[](https://elasticswap.org/\n\"ElasticSwap\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/SlingshotCrypto \"Slingshot\nFinance\")[](https://twitter.com/ElasticDAO\n\"ElasticDAO\")[](https://meebits.larvalabs.com/ \"LarvaLabs\nMeebits\")[](https://twitter.com/BasedProtocol \"Based\nLoans\")[](https://twitter.com/88mphapp\n\"88mph\")[](https://twitter.com/VisorFinance\n\"Visor\")[](https://twitter.com/FairsideNetwork\n\"FairSide\")[](https://twitter.com/groprotocol \"Gro\nProtocol\")[](https://twitter.com/wildcredit \"Wild\nCredit\")[](https://twitter.com/sherlockdefi\n\"Sherlock\")[](https://twitter.com/float_capital \"Float\nCapital\")[](https://twitter.com/AmbireWallet\n\"Ambire\")[](https://twitter.com/yaxis_project\n\"yAxis\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/tempusfinance \"Tempus\nFinance\")[](https://twitter.com/Covalent_HQ\n\"Covalent\")[](https://twitter.com/unionprotocol \"Union\nFinance\")[](https://twitter.com/feiprotocol \"Fei\nProtocol\")[](https://twitter.com/overlayprotocol \"Overlay\nProtocol\")[](https://twitter.com/mellowprotocol \"Mellow\nProtocol\")[](https://twitter.com/bootfinance \"Boot\nFinance\")[](https://twitter.com/perennial_fi\n\"Perennial\")[](https://twitter.com/UnlockProtocol \"Unlock Protocol\")\n\n[](https://twitter.com/TimeswapLabs\n\"Timeswap\")[](https://twitter.com/xdefi_wallet\n\"XDEFI\")[](https://twitter.com/SandclockOrg\n\"Sandclock\")[](https://twitter.com/MaltProtocol \"Malt\nFinance\")[](https://twitter.com/insuredao\n\"InsureDAO\")[](https://twitter.com/BehodlerAMM\n\"Behodler\")[](https://twitter.com/OpenLeverage\n\"OpenLeverage\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/jcam_official\n\"JPYC\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/HubbleExchange\n\"Hubble\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/dualityfi \"Duality\nFocus\")[](https://twitter.com/ConcurFinance \"Concur\nFinance\")[](https://twitter.com/VoltProtocol \"Volt\nProtocol\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/opensea\n\"OpenSea\")[](https://twitter.com/anchor_protocol \"Anchor\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@gpersoon.json b/qa_bot/knowledge_base/c4/website/@gpersoon.json new file mode 100644 index 0000000..95ea942 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@gpersoon.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@gpersoon", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# gpersoon\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nDoing all kinds of fun stuff with blockchains.\n\n## Highlights\n\n * yAcademy Guest Speaker\n\n * Identified critical risk in Aera codebase\n\nIdentified a critical bug stemming from another auditor's finding due to\nAera's dependency on Balancer.\n\n * Successfully disclosed an exploitable scenario on Balancer via Immunefi.\n\nOn May 14th 2022, I worked alongside another auditor to disclose an exploit to\nBalancer via Immunefi, receiving $50k as a reward. No user funds were lost as\na result.\n\n## gpersoon's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=gpersoon&source=gpersoon-profile)\n\n * [Twitter](https://twitter.com/gpersoon)\n * [Website](https://www.gpersoon.nl/)\n\n## Helped Secure\n\n[](https://twitter.com/TracerDAO \"Tracer\")[](https://twitter.com/realitycards\n\"Reality Cards\")[](https://marginswap.finance/\n\"Marginswap\")[](https://maple.finance/ \"Maple\nFinance\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/VisorFinance\n\"Visor\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/KuiperFinance\n\"Kuiper\")[](https://twitter.com/Amun\n\"Amun\")[](https://twitter.com/MaltProtocol \"Malt\nFinance\")[](https://twitter.com/MochiDeFi \"Mochi\")\n\n[](https://twitter.com/yetifinance \"Yeti\nFinance\")[](https://twitter.com/lukso_io\n\"LUKSO\")[](https://twitter.com/SlingshotCrypto \"Slingshot\nFinance\")[](https://twitter.com/ElasticDAO\n\"ElasticDAO\")[](https://twitter.com/VaderProtocol \"Vader\nProtocol\")[](https://meebits.larvalabs.com/ \"LarvaLabs\nMeebits\")[](https://twitter.com/BasedProtocol \"Based\nLoans\")[](https://nftx.org/ \"NFTX\")[](https://twitter.com/88mphapp\n\"88mph\")[](https://twitter.com/FairsideNetwork\n\"FairSide\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/groprotocol \"Gro\nProtocol\")[](https://twitter.com/wildcredit \"Wild\nCredit\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/SpartanProtocol \"Spartan\nProtocol\")[](https://twitter.com/sherlockdefi\n\"Sherlock\")[](https://twitter.com/float_capital \"Float\nCapital\")[](https://twitter.com/AmbireWallet\n\"Ambire\")[](https://twitter.com/yaxis_project\n\"yAxis\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/SushiSwap\n\"Sushi\")[](https://twitter.com/tempusfinance \"Tempus\nFinance\")[](https://twitter.com/TallyCash\n\"Tally\")[](https://twitter.com/Covalent_HQ\n\"Covalent\")[](https://twitter.com/unionprotocol \"Union\nFinance\")[](https://twitter.com/NestedFinance \"Nested\nFinance\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/overlayprotocol \"Overlay\nProtocol\")[](https://twitter.com/bootfinance \"Boot\nFinance\")[](https://twitter.com/sublimefinance\n\"Sublime\")[](https://twitter.com/xdefi_wallet \"XDEFI\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@gzeon.json b/qa_bot/knowledge_base/c4/website/@gzeon.json new file mode 100644 index 0000000..b85a092 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@gzeon.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@gzeon", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# gzeon\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\ndev [@offchainlabs](https://twitter.com/offchainlabs), initial builders of\n[@arbitrum](https://twitter.com/arbitrum) | judge\n[@code4rena](https://twitter.com/code4rena)\n\n## Highlights\n\n * Ranked #18 on Code4rena all-time leaderboard\n\nAs of June 2023, I'm currently ranked #18 on Code4rena's all-time leaderboard,\nwith $160k+ in awards.\n\n## gzeon's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=gzeon&source=gzeon-profile)\n\n * [Twitter](https://twitter.com/gzeon)\n * [Website](https://gzeon.dev/)\n * [Github](https://github.com/gzeoneth)\n\n## Helped Secure\n\n[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/VaderProtocol \"Vader\nProtocol\")[](https://twitter.com/insuredao\n\"InsureDAO\")[](https://twitter.com/Livepeer\n\"Livepeer\")[](https://twitter.com/NestedFinance \"Nested\nFinance\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/sublimefinance\n\"Sublime\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/OpenLeverage\n\"OpenLeverage\")[](https://twitter.com/KuiperFinance\n\"Kuiper\")[](https://twitter.com/ConcurFinance \"Concur\nFinance\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/ookitrade\n\"Ooki\")[](https://twitter.com/anchor_protocol\n\"Anchor\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/MochiDeFi\n\"Mochi\")[](https://twitter.com/FairsideNetwork\n\"FairSide\")[](https://twitter.com/feiprotocol \"Fei\nProtocol\")[](https://maple.finance/ \"Maple\nFinance\")[](https://twitter.com/overlayprotocol \"Overlay\nProtocol\")[](https://twitter.com/yaxis_project\n\"yAxis\")[](https://twitter.com/mellowprotocol \"Mellow\nProtocol\")[](https://twitter.com/bootfinance \"Boot\nFinance\")[](https://twitter.com/perennial_fi\n\"Perennial\")[](https://twitter.com/UnlockProtocol \"Unlock Protocol\")\n\n[](https://twitter.com/yetifinance \"Yeti Finance\")[](https://twitter.com/Amun\n\"Amun\")[](https://twitter.com/TimeswapLabs\n\"Timeswap\")[](https://twitter.com/SandclockOrg\n\"Sandclock\")[](https://twitter.com/MaltProtocol \"Malt\nFinance\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://elasticswap.org/\n\"ElasticSwap\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/BehodlerAMM \"Behodler\")[](https://nftx.org/\n\"NFTX\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/jcam_official\n\"JPYC\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/HubbleExchange\n\"Hubble\")[](https://twitter.com/sherlockdefi\n\"Sherlock\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/RollaFinance\n\"Rolla\")[](https://twitter.com/LensProtocol \"Aave\nLens\")[](https://twitter.com/phuture_finance \"Phuture\nFinance\")[](https://twitter.com/MIM_Spell \"Abracadabra\nMoney\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/SkaleNetwork\n\"SKALE\")[](https://twitter.com/callyfinance\n\"Cally\")[](https://twitter.com/CUDOS_ \"Cudos\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/backdfund\n\"Backd\")[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/opensea\n\"OpenSea\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/caviarAMM \"Caviar\")\n\n[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/OndoFinance \"Ondo\nFinance\")[](https://twitter.com/rabbithole_gg\n\"RabbitHole\")[](https://twitter.com/CantoIdentity \"Canto Identity Protocol\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@hansfriese.json b/qa_bot/knowledge_base/c4/website/@hansfriese.json new file mode 100644 index 0000000..7cab9a8 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@hansfriese.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@hansfriese", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# hansfriese\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nFounder [@SoloditOfficial](https://twitter.com/SoloditOfficial) | Co-founder,\nSecurity Researcher [@CyfrinAudits](https://twitter.com/CyfrinAudits) | Judge\nat [@code4rena](https://twitter.com/code4rena)\n\n## Highlights\n\n * Founded Solodit\n\nSolodit acts as an aggregator for audit reports across multiple platforms. At\npresent, it collects reports from Code4rena, Sherlock, Spearbit, Halborn,\nOpenZeppelin, Consensys, and Trail of Bits.\n\n * Currently ranked #1 on Code4rena 2023 Leaderboard\n\nAs of June 2023, I'm currently ranked #1 on Code4rena's 2023 leaderboard, with\n$75k+ in awards.\n\n## hansfriese's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=hansfriese&source=hansfriese-profile)\n\n * [Twitter](https://twitter.com/hansfriese)\n * [Solodit](https://solodit.xyz/)\n\n## Helped Secure\n\n[](https://twitter.com/bunker_finance\n\"bunker.finance\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/shapeshift\n\"Yieldy\")[](https://twitter.com/golom_io\n\"Golom\")[](https://twitter.com/debtdao \"Debt DAO\")\n\n[](https://twitter.com/escherxyz \"Escher\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/Popcorn_DAO\n\"Popcorn\")[](https://twitter.com/numoen\n\"Numoen\")[](https://twitter.com/rabbithole_gg\n\"RabbitHole\")[](https://twitter.com/EnsoFinance \"Enso\nFinance\")[](https://twitter.com/SturdyFinance\n\"Sturdy\")[](https://twitter.com/callyfinance\n\"Cally\")[](https://twitter.com/CUDOS_\n\"Cudos\")[](https://twitter.com/AlchemixFi\n\"Alchemix\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/backdfund\n\"Backd\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/infinitydotxyz \"Infinity NFT\nMarketplace\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/IlluminateFi\n\"Illuminate\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/NestedFinance \"Nested\nFinance\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/axelarcore \"Axelar\nNetwork\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/vtvlco\n\"VTVL\")[](https://twitter.com/artgobblers \"Art\nGobblers\")[](https://twitter.com/blur_io \"Blur\nExchange\")[](https://twitter.com/3xcalibur69\n\"3xcalibur\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/sizemarkets\n\"SIZE\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/InverseFinance \"Inverse\nFinance\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/caviarAMM\n\"Caviar\")[](https://twitter.com/forgeries_io\n\"Forgeries\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/OndoFinance \"Ondo\nFinance\")[](https://twitter.com/TimeswapLabs\n\"Timeswap\")[](https://twitter.com/dripsnetwork \"Drips\nProtocol\")[](https://twitter.com/EthosReserve \"Ethos Reserve\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@hickuphh3.json b/qa_bot/knowledge_base/c4/website/@hickuphh3.json new file mode 100644 index 0000000..1859f83 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@hickuphh3.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@hickuphh3", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# hickuphh3\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nIndependent Security Researcher\n\n## Highlights\n\n * [Ranked #20 on Code4rena all-time leaderboard](https://code4rena.com/leaderboard)\n\nAs of June 2023, I'm currently ranked #20 on the Code4rena all-time\nleaderboard, with $140k+ in awards.\n\n * [ETHTaipei Security Panel](https://code4rena.com/leaderboard)\n\nIn April 2023, I participated in a live panel at ETHTaipei on all things web3\nsecurity alongside representatives from Code4rena, Quantstamp, Trail of Bits,\nand more.\n\n * [Hosted community workshop on Clober](https://www.youtube.com/watch?v=9sbUCjfM6P4)\n\nAs part of my work with Spearbit, I hosted a community workshop that looked at\na security assessment of Clober DEX.\n\n## hickuphh3's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n\\- Small codebases preferred (~2, max 3k sLOC) with assurance of other audit\nengagements \\- Preference for defi (DEXes, derivatives, NFT lending etc.)\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=hickuphh3&source=hickuphh3-profile)\n\n## About\n\nHi! I'm an independent security researcher since mid 2021, with smart contract\nexperience since 2018. Lived through crypto winter and 2020 defi summer.\nAlways curious, never ceasing in trying out new tooling & cutting edge-\nplatforms\n\n * [Twitter](https://twitter.com/HickupH)\n\n## Helped Secure\n\n[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/float_capital \"Float\nCapital\")[](https://twitter.com/SpartanProtocol \"Spartan\nProtocol\")[](https://twitter.com/SandclockOrg\n\"Sandclock\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/ConcurFinance \"Concur\nFinance\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/sherlockdefi\n\"Sherlock\")[](https://twitter.com/realitycards \"Reality\nCards\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/AltheaNetwork \"Althea Gravity\nBridge\")[](https://twitter.com/yaxis_project\n\"yAxis\")[](https://twitter.com/wildcredit \"Wild\nCredit\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/SlingshotCrypto \"Slingshot\nFinance\")[](https://twitter.com/Covalent_HQ\n\"Covalent\")[](https://twitter.com/SushiSwap\n\"Sushi\")[](https://twitter.com/FairsideNetwork\n\"FairSide\")[](https://twitter.com/feiprotocol \"Fei\nProtocol\")[](https://twitter.com/jcam_official\n\"JPYC\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/lifiprotocol\n\"LI.FI\")[](https://twitter.com/jpegd_69\n\"JPEG'd\")[](https://twitter.com/sublimefinance\n\"Sublime\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/SturdyFinance\n\"Sturdy\")[](https://twitter.com/joynxyz\n\"Joyn\")[](https://twitter.com/callyfinance\n\"Cally\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/opensea\n\"OpenSea\")[](https://twitter.com/anchor_protocol \"Anchor\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@hyh.json b/qa_bot/knowledge_base/c4/website/@hyh.json new file mode 100644 index 0000000..92eeae4 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@hyh.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@hyh", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# hyh\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nIndependent smart contracts security researcher, LSR at Spearbit\n\n## Highlights\n\n * [Ranked #4 on Code4rena all-time Leaderboard](https://twitter.com/0xhyh/status/1621272460577366022)\n\nAs of June 2023, I was ranked #4 on the Code4rena all-time leaderboard\n\n * [Led series of Union v2 and Ajna Finance security reviews on Sherlock](https://github.com/ajna-finance/audits)\n\nI led audits for Union v2 (reported 8 of 9 high severity issues found in 2\ncontests) and Ajna Finance (12 of 17 highs in 2 contests), both projects are\nnow live\n\n## hyh's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\nEVM\n\n[Get a quote](https://code4rena.typeform.com/solo-audit#warden=hyh&source=hyh-\nprofile)\n\n * [Twitter](https://twitter.com/0xhyh)\n\n## Helped Secure\n\n[](https://twitter.com/ajnafi \"Ajna\nProtocol\")[](https://twitter.com/mellowprotocol \"Mellow\nProtocol\")[](https://twitter.com/TimeswapLabs\n\"Timeswap\")[](https://twitter.com/MaltProtocol \"Malt Finance\")\n\n[](https://nftx.org/ \"NFTX\")[](https://twitter.com/HubbleExchange\n\"Hubble\")[](https://twitter.com/Livepeer\n\"Livepeer\")[](https://twitter.com/OpenLeverage\n\"OpenLeverage\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/biconomy\n\"Biconomy\")[](https://twitter.com/SturdyFinance\n\"Sturdy\")[](https://twitter.com/insuredao\n\"InsureDAO\")[](https://twitter.com/LensProtocol \"Aave\nLens\")[](https://twitter.com/IlluminateFi\n\"Illuminate\")[](https://twitter.com/AlchemixFi\n\"Alchemix\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/tempusfinance \"Tempus\nFinance\")[](https://twitter.com/MochiDeFi\n\"Mochi\")[](https://twitter.com/unionprotocol \"Union\nFinance\")[](https://twitter.com/FairsideNetwork\n\"FairSide\")[](https://twitter.com/NestedFinance \"Nested\nFinance\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/VaderProtocol \"Vader\nProtocol\")[](https://maple.finance/ \"Maple\nFinance\")[](https://twitter.com/overlayprotocol \"Overlay\nProtocol\")[](https://twitter.com/bootfinance \"Boot\nFinance\")[](https://twitter.com/yetifinance \"Yeti\nFinance\")[](https://twitter.com/Amun\n\"Amun\")[](https://twitter.com/sublimefinance\n\"Sublime\")[](https://twitter.com/SandclockOrg\n\"Sandclock\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://elasticswap.org/\n\"ElasticSwap\")[](https://twitter.com/yield\n\"Yield\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/BehodlerAMM\n\"Behodler\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/sherlockdefi\n\"Sherlock\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/lifiprotocol\n\"LI.FI\")[](https://twitter.com/RollaFinance\n\"Rolla\")[](https://twitter.com/dualityfi \"Duality\nFocus\")[](https://twitter.com/jpegd_69\n\"JPEG'd\")[](https://twitter.com/ConcurFinance \"Concur\nFinance\")[](https://twitter.com/backdfund\n\"Backd\")[](https://twitter.com/VoltProtocol \"Volt\nProtocol\")[](https://twitter.com/phuture_finance \"Phuture\nFinance\")[](https://twitter.com/bunker_finance\n\"bunker.finance\")[](https://twitter.com/MIM_Spell \"Abracadabra\nMoney\")[](https://twitter.com/EnsoFinance \"Enso\nFinance\")[](https://twitter.com/mimo_labs \"Mimo\nDeFi\")[](https://twitter.com/joynxyz\n\"Joyn\")[](https://twitter.com/callyfinance\n\"Cally\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/infinitydotxyz \"Infinity NFT\nMarketplace\")[](https://twitter.com/opensea\n\"OpenSea\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/ookitrade\n\"Ooki\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/ensdomains \"ENS\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/vtvlco \"VTVL\")[](https://twitter.com/golom_io\n\"Golom\")[](https://twitter.com/artgobblers \"Art\nGobblers\")[](https://twitter.com/Y2kFinance \"Y2k\nFinance\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/looksrare\n\"LooksRare\")[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/EthosReserve \"Ethos Reserve\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@lambda.json b/qa_bot/knowledge_base/c4/website/@lambda.json new file mode 100644 index 0000000..9de5e30 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@lambda.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@lambda", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# Lambda\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nSecurity Researcher and Developer with over 8 years of experience that\ntransitioned from traditional finance to the blockchain space. Over 40\ncompleted audits (and penetration tests of off-chain components like wallets)\nin various settings with more than 600 findings and over 140 C4 high and\nmedium findings with 17 top 5 rankings in roughly 4 months. Academic\nbackground in Data Science / Mathematical Finance and High-Performance\nComputing with multiple\n[publications](https://scholar.google.com/citations?user=Qw45ZYwAAAAJ).\nStarted solving CTFs as a small child and has not stopped since then, member\nof the 2016 Swiss national CTF team at the European cyber security\nchampionship.\n\n## Highlights\n\n * [40+ completed audits and penetration tests](https://github.com/OpenCoreCH/smart-contract-audits)\n\nIn these audits, I found 600+ findings, which included 140+ high and medium\nseverity findings on Code4rena.\n\n## Lambda's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=Lambda&source=Lambda-profile)\n\n## About\n\nLambda is a Security Researcher and Developer with multiple years of\nexperience in IT security and traditional finance. His academic background is\nin Data Science, Mathematical Finance, and High-Performance Computing.\n\n * [Trust Security Profile](https://www.trust-security.xyz/team/lambda)\n\n## Helped Secure\n\n[](https://twitter.com/axelarcore \"Axelar\nNetwork\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/olympusdao \"Olympus\nDAO\")[](https://twitter.com/fraxfinance \"Frax\nFinance\")[](https://twitter.com/vtvlco \"VTVL\")[](https://twitter.com/golom_io\n\"Golom\")[](https://twitter.com/holographxyz\n\"Holograph\")[](https://twitter.com/blockswap_team \"Stakehouse\nProtocol\")[](https://twitter.com/AngleProtocol \"Angle\nProtocol\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/infinitydotxyz \"Infinity NFT\nMarketplace\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/IlluminateFi\n\"Illuminate\")[](https://twitter.com/juiceboxETH\n\"Juicebox\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/CantoPublic\n\"Canto\")[](https://twitter.com/mimo_labs \"Mimo\nDeFi\")[](https://twitter.com/Rigor_HQ \"Rigor\nProtocol\")[](https://twitter.com/shapeshift\n\"Yieldy\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/fiatdao \"FIAT\nDAO\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/ensdomains \"ENS\")[](https://twitter.com/prtyDAO\n\"PartyDAO\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/QuickswapDEX \"QuickSwap and\nStellaSwap\")[](https://twitter.com/artgobblers \"Art\nGobblers\")[](https://twitter.com/blur_io \"Blur\nExchange\")[](https://twitter.com/Y2kFinance \"Y2k\nFinance\")[](https://twitter.com/sizemarkets\n\"SIZE\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/traderjoe_xyz \"Trader\nJoe\")[](https://twitter.com/InverseFinance \"Inverse\nFinance\")[](https://twitter.com/chainlink\n\"Chainlink\")[](https://twitter.com/debtdao \"Debt\nDAO\")[](https://twitter.com/looksrare\n\"LooksRare\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/escherxyz\n\"Escher\")[](https://twitter.com/caviarAMM\n\"Caviar\")[](https://twitter.com/ParaSpace_NFT \"ParaSpace\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@leastwood.json b/qa_bot/knowledge_base/c4/website/@leastwood.json new file mode 100644 index 0000000..a74c743 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@leastwood.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@leastwood", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# leastwood\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\nI'm an independent security researcher focused primarily on EVM security. Most\nof my time is spent as a Lead Security Researcher at\n[Spearbit](https://spearbit.com/), but I like to dabble in other smart\ncontract stacks.\n\n## Highlights\n\n * Worked as a Sigma Prime security engineer\n\nPreviously worked with Sigma Prime as a security engineer for 2 years.\n\n * Interview with Andy Li\n\nIn early 2023, I had the opportunity to talk to Andy about web3 security and\nsmart contract auditing.\n\n * Paradigm Fellow\n\nCurrently a member of the Paradigm fellowship program.\n\n * Rank #3 all-time on Code4rena\n\nAt the time of writing, currently ranked at the #3 spot all-time on the\nCode4rena leaderboard.\n\n * Placed 11th in Paradigm CTF\n\nCompeted in the Paradigm CTF under SpearbitVanguard and ranked #11 overall.\n\n## leastwood's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=leastwood&source=leastwood-profile)\n\n * [Twitter](https://twitter.com/0xleastwood?lang=en)\n * [Telegram](https://telegram.me/leastwood)\n * [Website](https://www.leastwood.xyz/)\n\n## Helped Secure\n\n[](https://twitter.com/SushiSwap\n\"Sushi\")[](https://twitter.com/FairsideNetwork\n\"FairSide\")[](https://twitter.com/ConcurFinance \"Concur\nFinance\")[](https://twitter.com/EnsoFinance \"Enso\nFinance\")[](https://twitter.com/joynxyz\n\"Joyn\")[](https://twitter.com/realitycards \"Reality\nCards\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/wildcredit \"Wild\nCredit\")[](https://twitter.com/SwivelFinance\n\"Swivel\")[](https://twitter.com/MochiDeFi\n\"Mochi\")[](https://twitter.com/TallyCash\n\"Tally\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/VaderProtocol \"Vader\nProtocol\")[](https://twitter.com/KuiperFinance\n\"Kuiper\")[](https://twitter.com/bootfinance \"Boot\nFinance\")[](https://twitter.com/perennial_fi\n\"Perennial\")[](https://twitter.com/sublimefinance\n\"Sublime\")[](https://twitter.com/xdefi_wallet\n\"XDEFI\")[](https://twitter.com/SandclockOrg\n\"Sandclock\")[](https://twitter.com/MaltProtocol \"Malt\nFinance\")[](https://twitter.com/insuredao\n\"InsureDAO\")[](https://twitter.com/yield \"Yield\")[](https://nftx.org/\n\"NFTX\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/jcam_official\n\"JPYC\")[](https://twitter.com/foundation\n\"Foundation\")[](https://twitter.com/HubbleExchange\n\"Hubble\")[](https://twitter.com/Paladin_vote\n\"Paladin\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/bunker_finance\n\"bunker.finance\")[](https://twitter.com/SturdyFinance\n\"Sturdy\")[](https://twitter.com/SkaleNetwork\n\"SKALE\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/forgottenrunes \"Forgotten Runes\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@unforgiven.json b/qa_bot/knowledge_base/c4/website/@unforgiven.json new file mode 100644 index 0000000..337ff15 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@unforgiven.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@unforgiven", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n# unforgiven\n\nwarden\n\nOG Warden\n\n## unforgiven's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Helped Secure\n\n[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/BadgerDAO\n\"BadgerDAO\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/infinitydotxyz \"Infinity NFT\nMarketplace\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/Y2kFinance \"Y2k\nFinance\")[](https://twitter.com/caviarAMM\n\"Caviar\")[](https://twitter.com/TigrisTrades \"Tigris\nTrade\")[](https://twitter.com/backed_xyz \"Backed\nProtocol\")[](https://twitter.com/GoGoPool_\n\"GoGoPool\")[](https://twitter.com/reserveprotocol\n\"Reserve\")[](https://twitter.com/zksync\n\"zkSync\")[](https://twitter.com/backdfund\n\"Backd\")[](https://twitter.com/PoolTogether_\n\"PoolTogether\")[](https://twitter.com/MIM_Spell \"Abracadabra\nMoney\")[](https://twitter.com/mimo_labs \"Mimo\nDeFi\")[](https://twitter.com/FactDAO/\n\"FactoryDAO\")[](https://twitter.com/forgottenrunes \"Forgotten\nRunes\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/IlluminateFi\n\"Illuminate\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/shapeshift\n\"Yieldy\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/nounsbuilder \"Nouns\nBuilder\")[](https://twitter.com/feiprotocol\n\"Tribe\")[](https://twitter.com/blockswap_team \"Stakehouse\nProtocol\")[](https://twitter.com/prepo_io\n\"prePO\")[](https://twitter.com/ParaSpace_NFT\n\"ParaSpace\")[](https://twitter.com/AstariaXYZ \"Astaria\")\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/@xiaoming90.json b/qa_bot/knowledge_base/c4/website/@xiaoming90.json new file mode 100644 index 0000000..dc21e2a --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/@xiaoming90.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/@xiaoming90", "md_content": "Skip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# xiaoming90\n\nwarden\n\nOG Warden\n\nAvailable for Hire\n\n### About Me\n\nSecurity Researcher [@SpearbitDAO](https://twitter.com/SpearbitDAO) |\nCertified Warden [@code4rena](https://twitter.com/code4rena) | Senior Watson\n[@sherlockdefi](https://twitter.com/sherlockdefi) | Portfolio:\n\n\n### Private Audits\n\n * Kiln (Spearbit)\n * Liquid Collective (Spearbit)\n * Velodrome Finance (Spearbit)\n * Polygon zkEVM (Spearbit)\n * Connext Network (Spearbit)\n\n### Public Audit Contests\n\n * Connext Network (Code4rena) - Ranked 1st\n * Velodrome Finance (Code4rena) - Ranked 1st\n * Notional V3 (Sherlock) - Ranked 1st\n * Notional (Leveraged Vault) (Sherlock) - Ranked 1st\n * Notional (Leveraged Vault) Update #1 (Sherlock) - Ranked 1st\n * Notional (Leveraged Vault) Update #2 (Sherlock) - Ranked 1st\n * Redacted Cartel (Code4rena) - Ranked 1st\n * Bond Protocol (Sherlock) - Ranked 1st\n * Nibbl (Code4rena) - Ranked 1st\n * Axelar Network (Code4rena) - Ranked 2nd\n * veToken Finance (Code4rena) - Ranked 2nd\n * Bond Protocol Update #1 (Sherlock) - Ranked 3rd\n * Notional x Index Coop (Code4rena) - Ranked 3rd\n * Putty Finance (Code4rena) - Ranked 6th\n * Sentiment (Sherlock) - Ranked 6th\n * Rubicon (Code4rena) - Ranked 7th\n * ParaSpace (Code4rena) - Ranked 12th\n * Aura Finance (Code4rena) - Ranked 15th\n * Fractional (Code4rena) - Ranked 15th\n * Harpie (Sherlock) - Ranked 16th\n * Optimism (Sherlock) - Ranked 24th\n\n## xiaoming90's Activity\n\nThe Activity Stream is currently in beta. [Share feedback here](/help).\n\n## Available for hire\n\n[Get a quote](https://code4rena.typeform.com/solo-\naudit#warden=xiaoming90&source=xiaoming90-profile)\n\n * [Twitter](https://twitter.com/xiaoming9090)\n * [Github](https://github.com/xiaoming9090)\n\n## Helped Secure\n\n[](https://twitter.com/velodromefi \"Velodrome\nFinance\")[](https://twitter.com/puttyfi\n\"Putty\")[](https://twitter.com/redactedcartel \"Redacted\nCartel\")[](https://twitter.com/NotionalFinance\n\"Notional\")[](https://twitter.com/rubicondefi\n\"Rubicon\")[](https://twitter.com/nibblnft\n\"Nibbl\")[](https://twitter.com/aurafinance \"Aura\nFinance\")[](https://twitter.com/veTokenFinance \"veToken\nFinance\")[](https://twitter.com/ConnextNetwork\n\"Connext\")[](https://twitter.com/fractional_art\n\"Fractional\")[](https://twitter.com/axelarcore \"Axelar\nNetwork\")[](https://twitter.com/nounsdao \"Nouns\nDAO\")[](https://twitter.com/ParaSpace_NFT \"ParaSpace\")\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/bot.json b/qa_bot/knowledge_base/c4/website/bot.json new file mode 100644 index 0000000..8ac8e8c --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/bot.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/register/bot", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# Gentlefrens, start your engines.\n\n## Coordinating AI and human efforts to provide the most comprehensive\nvulnerability reports for web3 projects.\n\nKeep your eye on our Announcements channel in\n[Discord](https://discord.gg/code4rena) to find out when the next one will be.\n\n## What's a Bot Race?\n\n **Bot Races** make AI and automated tools the first phase of the competitive\naudit.\n\n **Bot Crews** compete to see whose bot can create the highest quality and\nmost thorough audit report.\n\nUp to **10%** of each contest pool will be dedicated to Bot Race prizes.\n\n## How do Bot Races work?\n\n###\n\nStage 1\n\nQualifier Race\n\n * Bot Crews race to have their bots deliver the highest quality and most thorough report based on a repo provided at start time.\n * Qualifier Races are open for one hour.\n * Judges choose the Top 20 Bots for upcoming competitions.\n\n###\n\nStage 2\n\nBot Race\n\n * The first hour of each audit competition will be dedicated to a Bot Race.\n * Bot Race submissions will be assessed by Lookouts, and graded A/B/C similarly to other reports based on report quality, validity, and thoroughness.\n * The best Bot Race report will receive a 30% share bonus.\n * The top report will be shared in the competition's channel and will be considered the official source for known issue submissions.\n\n[Learn more about Bot Races](https://medium.com/code4rena/new-to-code4rena-\nbot-races-91b8f4ca0b18)\n\n## Is your Crew ready to race your bot?\n\n### Be sure to give it a winning name!\n\n# Bot Registration is closed\n\nThe Bot Race qualifier window is currently closed. \nKeep your eye on our Announcements channel in\n[Discord](https://discord.gg/code4rena) to find out when the next one will be.\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/contests.json b/qa_bot/knowledge_base/c4/website/contests.json new file mode 100644 index 0000000..f14a87a --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/contests.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/contests", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n## Loading...\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/help.json b/qa_bot/knowledge_base/c4/website/help.json new file mode 100644 index 0000000..d2b137b --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/help.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/help", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# How can we help?\n\nContact Information * (Optional)\n\nPlease enter your discord handle or your email address so we can follow up\nwith you\n\nDiscord Handle (Optional)\n\nEmail Address (Optional)\n\nWhat type of problem do you need help with? *\n\nSelect ...\n\nSubject * *\n\nDescription * *\n\nFor content longer than 2000 characters, please include a link to a gist in\nGitHub\n\n * Edit\n * Preview\n\nSubmit\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/how-it-works.json b/qa_bot/knowledge_base/c4/website/how-it-works.json new file mode 100644 index 0000000..41a0a3f --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/how-it-works.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/how-it-works", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# How it works\n\n## Roles\n\n### Warden\n\nWardens protect the web3 ecosystem from threats by auditing code.\n\n[Learn more about Wardens](/how-it-works/wardens)\n\n### Judge\n\nJudges decide the severity, validity, and quality of findings and rate the\nperformance of wardens.\n\n### Sponsor\n\nSponsors create prize pools to attract wardens to audit their projects.\n\n### Scout\n\nScouts focus on scoping and pre-audit intel.\n\n### Lookout\n\nLookouts review and organize submissions to Code4rena's audits.\n\n## Audit Types\n\n### Open\n\nThis is the standard competitive audit format on Code4rena where everyone is\ninvited to participate and all information is open and public.\n\n[Learn more about open competitions](https://medium.com/code-423n4/a-look-at-\ncode4rena-audits-open-1a8e74e558c8)\n\n### Private\n\nThis type of audit is restricted to Wardens who have met the conditions of the\nCode4rena [Certified Contributor](https://docs.code4rena.com/roles/certified-\ncontributors) program, which includes a Non-Disclosure Agreement. With\ncustomizations available for your privacy needs, Code4rena can offer you an\naudit that is as stealthy as you'd like.\n\n[Learn more about Private audits](https://medium.com/code4rena/a-look-at-\ncode4rena-audits-classified-3ee3cbe87617)\n\n### Invitational\n\nSponsors get the opportunity to work with top Wardens from the Code4rena\ncommunity in a format that best suits their needs. When an Invitational audit\nis announced, a limited number of the highest-ranking Wardens who RSVP within\na 48-hour window can participate in the audit.\n\n[Learn more about Invitational audits](https://medium.com/code4rena/a-look-at-\ncode4rena-audits-versus-6c55d57939ef)\n\n### Mitigation review\n\nOnce new code is ready for review after a Code4rena audit, the highest-\nperforming Wardens who found the initial vulnerabilities are invited to\nparticipate in a Mitigation review. Insertions, deletions and changes are\nconsidered in scope, with these Wardens doing another pass to confirm these\namendments and suitable and/or find any newly introduced vulnerabilities.\n\n[Learn more about Mitigation reviews](https://medium.com/code4rena/a-look-at-\ncode4rena-audits-mitigation-review-3e05f8b7acb7)\n\n### Bot Races\n\nBot Races make AI and automated tools part of the competitive audit. Bots are\nregistered and used to submit findings that can be automatically detected in a\nproject's code, with the winning report serving as a resource for all auditors\nand forming the basis for known issues for the audit.\n\n[Learn more about Bot Races](/register/bot)\n\n### Test Coverage\n\nCode4rena Test Coverage empowers projects to meet all of their priorities\nwithout compromise, by providing projects with surge capacity for their\nengineering teams pre-launch. Ship quality code that has been thoroughly\ntested by our community of security-minded experts, at an accelerated rate.\n\n[Learn more about Test Coverage](/test-coverage)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/index.json b/qa_bot/knowledge_base/c4/website/index.json new file mode 100644 index 0000000..c9e949c --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/index.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\nTab navigationFor ProjectsFor Wardens\n\n#\n\nSecure your\n\nsmart contracts.\n\nTop auditors compete to keep high severity bugs out of production. Start a\npublic or private audit within 48 hours.\n\n[Get your project audited](https://code4rena.typeform.com/i-want-an-audit)[See\npast reports](/reports)\n\n921\n\nunique \nhigh-severity vulns\n\n19,605\n\nunique findings\n\n5,000+\n\nregistered wardens\n\n257\n\naudits completed\n\n## Leading crypto projects choose Code4rena.\n\nThere's a reason why.\n\n * * * * * * * * * * * * * * * * * * \n\n## Don't just take our word for it.\n\n> With C4, it's like an order of magnitude improvement over other ways of\n> doing audits. The number of bugs, the degree to which I was convinced people\n> had looked into the code and found everything to be found, was much higher\n> due to the engagement, the variety of things reported, and the incentive\n> mechanism.\n>\n> **-- Reserve Protocol**\n\n> One of the things I enjoy most about competing on @code4rena is getting to\n> read lots of code and see new techniques in the wild.\n>\n> **--[Horsefacts](https://twitter.com/eth_call/status/1621940992411574284)**\n\n> We greatly enjoyed our @code4rena experience - here's why it is the best\n> value in auditing: quick spin-up time, flexible bounty awards, open nature\n> means more eyes on your code, by our count ~10x more vs traditional audit\n> shops, helpful community of wardens and judges\n>\n>\n> **--[@_benjaminhughes](https://twitter.com/_benjaminhughes/status/1554527455087558658?s=20&t=SxGE6sz2wxxChjUfrCW3dQ)**\n\n> After grinding for 14 months I finally reached my goal of being the first to\n> cross 1M$ on the @code4rena leaderboard. Thanks to everyone involved, this\n> has been very fun, lucrative, and I learned a lot by seeing other wardens'\n> vulnerabilities that I missed.\n>\n> **--[Cmichel](https://twitter.com/cmichelio/status/1521241247159140355)**\n\n> Just went through the latest @code4rena audit report of the @blur_io\n> exchange. This report is a great example of why audits are so important -\n> wardens were able to discover a high risk exploit that would've allowed\n> sellers to steal funds from buyers.\n>\n> **--[cygaar](https://twitter.com/0xCygaar/status/1601065454771924992)**\n\n> These @code4rena payouts are ridiculous. I'm tempted to drop everything and\n> tunnel vision that\n>\n> **--[@DeGatchi](https://twitter.com/DeGatchi/status/1624020967281557504)**\n\n> C4 wardens > any single audit shop IMO\n>\n> **--[@_benjaminhughes](https://twitter.com/_benjaminhughes)**\n\n> Thank you to all of the wardens who have been working on our code base. So\n> many great questions and so many many of them. I hope you'll all come try it\n> out when we're live and critique the user experience from top to bottom.\n> 10/10 would work with y'all again.\n>\n> **-- androolloyd**\n\n> Our experience with Code4rena has been great. After a very short lead time\n> and smooth onboarding, many experienced security researchers analyzed our\n> code in depth and provided valuable feedback. In addition to security-\n> related findings, we also received useful optimization suggestions.\n>\n> **-- Canto Identity Protocol**\n\n> I think @code4rena is a 10x improvement on smart contract audits. They're\n> timely, have transparent pricing and source a lot of great developer\n> attention.\n>\n>\n> **--[Zora](https://twitter.com/js_horne/status/1571623088890183687?lang=en)**\n\n# Are you ready?\n\n## The best time to secure your project is now.\n\n[Get Started](https://code4rena.typeform.com/i-want-an-audit)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/leaderboard.json b/qa_bot/knowledge_base/c4/website/leaderboard.json new file mode 100644 index 0000000..818b25c --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/leaderboard.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/leaderboard", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# Leaderboard\n\nDate Range:202320222021Last 90 daysLast 365 daysAll time\n\nFilter By:AllAvailable for Hire\n\n * #\n * Competitor\n * USD \u25bc\n * Total\n * High\n * (Solo)\n * Med\n * (Solo)\n * Gas\n\n * Fetching results...\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/newsletter-signup.json b/qa_bot/knowledge_base/c4/website/newsletter-signup.json new file mode 100644 index 0000000..59f31fd --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/newsletter-signup.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/newsletter-signup", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# Subscribe to our newsletter\n\nEmail address*\n\nSubscribe\n\n## Thank you!\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/register.json b/qa_bot/knowledge_base/c4/website/register.json new file mode 100644 index 0000000..255839a --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/register.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/register", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# Warden Registration\n\nTo register as a warden, please fill out this form and join us in\n[Discord](https://discord.gg/code4rena)\n\n * Register with Wallet\n * Register with Password\n\nCode4rena Username *\n\n **Choose wisely! Your username cannot be changed later.** \nUsed to report findings, as well as display your total award amount on the\nleaderboard. Supports alphanumeric characters, underscores, and hyphens. \n(Note: for consistency, please ensure your server nickname in our Discord\nmatches the username you provide here)\n\nDiscord Username *\n\nUsed in case we need to contact you about your submissions or winnings.\n\nEmail Address *\n\nUsed for sending confirmation emails for each of your submissions.\n\nPassword *\n\nMust be at least 18 characters long.\n\nConfirm Password *\n\nGitHub Username (Optional)\n\nUsed in case we need to give you access to certain repositories.\n\nAvatar (Optional)\n\nAn avatar displayed next to your name on the leaderboard.\n\nUse my wallet address for payment on Polygon\n\nBy submitting this form, you agree to abide by the C4 [code of\nconduct](https://discord.com/channels/810916927919620096/851883682470166558/851891396255940618)\nand [submission policy](https://docs.code4rena.com/roles/wardens/submission-\npolicy). If moderators determine you to be in violation of these guidelines,\nyou may be subject to consequences which could include a ban or forfeiture of\nawards.\n\nRegister with MetaMaskRegister with WalletConnect\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/reports.json b/qa_bot/knowledge_base/c4/website/reports.json new file mode 100644 index 0000000..bf0dfe1 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/reports.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/reports", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# Audit Reports\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/test-coverage.json b/qa_bot/knowledge_base/c4/website/test-coverage.json new file mode 100644 index 0000000..3db8c19 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/test-coverage.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/test-coverage", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# Test coverage, solved.\n\n## Saving your project time, money, and effort with the help of our web3 smart\ncontract experts.\n\n## Projects are already choosing Code4rena Test Coverage.\n\nHere's why: [ Code4rena Test Coverage](https://medium.com/code4rena/new-to-\ncode4rena-test-coverage-c548645404f9) empowers projects to meet all of their\npriorities without compromise, by providing projects with surge capacity for\ntheir engineering teams pre-launch.\n\n### Save time.\n\nWith the added capacity provided by Test Coverage Wardens, test coverage can\nbe completed in just a matter of days, saving your team precious time.\n\n### Save money.\n\nBy utilizing Code4rena Test Coverage, your project can save money that would\nhave been spent on hiring freelancers to complete test coverage.\n\n### Ensure quality.\n\nYou're empowered to ship code that has been thoroughly tested by security-\nminded experts, allowing your team to stay focused on the things that only\nthey can do.\n\n## How does Code4rena Test Coverage work?\n\n * ### Step 1\n\nThe project's engineer creates a GitHub project and outlines the functions\nneeding test coverage.\n\n * ### Step 2\n\nWardens review the GitHub project and make suggestions for additional tests\nneeded.\n\n * ### Step 3\n\nWardens claim test coverage tickets, work on them, then claim another when\nthey complete the first one.\n\n## Get comprehensive test coverage for your project today.\n\nWe're here to help you get the highest quality result in the shortest amount\nof time, without compromising on time, money, effort, or security. If you're\nlooking to deploy Code4rena Test Coverage as part of your end-to-end secure\nlaunch approach, reach out to our team.\n\n[Contact us](https://t.me/trebienxyz)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/website/wardens.json b/qa_bot/knowledge_base/c4/website/wardens.json new file mode 100644 index 0000000..9d26f28 --- /dev/null +++ b/qa_bot/knowledge_base/c4/website/wardens.json @@ -0,0 +1 @@ +{"url": "https://code4rena.com/how-it-works/wardens", "md_content": "Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\n->](/how-it-works/wardens)\n\nSkip Navigation\n\n[](/)\n\n[How it works](/how-it-\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\n\nConnect\n\nMetaMaskWalletConnectLog inRegister\n\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\n\n# Wardens, welcome to the Arena.\n\n## The home for Web3 security experts.\n\n## A day in the life of a Code4rena Warden.\n\nTo have a secure ecosystem, there is a collective need for the services of\nsecurity and auditing experts. That's where you Wardens come in to save the\nday (and get rewarded for doing so!).\n\n * Step 1: Audit Code.\n * Step 2: Identify Vulnerablilities.\n * Step 3: Get Paid.\n * Step 4: Climb the [Leaderboard.](/leaderboard)\n\n## Ready to showcase your skills?\n\nCode4rena Profiles are a solo auditor's highlight reel.\n\n### Booking solo audits has never been easier. \ud83e\udd1d\n\nProjects can now book a solo audit with you directly from your profile. This\nfeature (combined with the Code4rena team's administrative assistance) makes\nbooking solo audits easy, allowing you to focus on what you're best at:\nauditing code.\n\n### Every stat, all in one place. \ud83d\udcc8\n\nWant to keep track of your individual performance and leaderboard rankings?\nThe metrics bar in your Code4rena Profile does just that.\n\n### All fired up. \ud83d\udd25\n\nWe know some of you are more visually minded than others, which is why there's\na toggle-able option to view your performance in a visual heat map.\n\n### It's like Twitter but for your auditing. \ud83d\udce3\n\nEvery audit you participate in, and every finding you make, all in a tidy,\nchronological activity stream.\n\n### Shine where shine is due. \u2728\n\nSometimes there's a finding that you're super proud of, and CodeArena\nencourages that. With the ability to highlight specific findings, you can\nbring your best work to the front of the stage.\n\n### For a more in-depth overview of Code4rena Profiles, \nvisit this [blog](https://medium.com/code4rena/code4rena-profiles-a-solo-\nauditors-highlight-reel-c4174946e388).\n\n## Your journey to the top begins now.\n\nBecome one of the best.\n\n * [cmichel\u2197](https://code4rena.com/@cmichel)\n * [leastwood\u2197](https://code4rena.com/@leastwood)\n * [hyh\u2197](https://code4rena.com/@hyh)\n * [gpersoon\u2197](https://code4rena.com/@gpersoon)\n * [gzeon\u2197](https://code4rena.com/@gzeon)\n * [hickuphh3\u2197](https://code4rena.com/@hickuphh3)\n * [cccz\u2197](https://code4rena.com/@cccz)\n * [Lambda\u2197](https://code4rena.com/@lambda)\n * [xiaoming90\u2197](https://code4rena.com/@xiaoming90)\n * [hansfriese\u2197](https://code4rena.com/@hansfriese)\n * [Trust\u2197](https://code4rena.com/@Trust)\n * [0xsomeone\u2197](https://code4rena.com/@0xsomeone)\n * [unforgiven\u2197](https://code4rena.com/@unforgiven)\n * [HollaDieWaldfee\u2197](https://code4rena.com/@HollaDieWaldfee)\n * [Jeiwan\u2197](https://code4rena.com/@Jeiwan)\n * [0x52\u2197](https://code4rena.com/@0x52)\n * [IllIllI\u2197](https://code4rena.com/@IllIllI)\n * [bin2chen\u2197](https://code4rena.com/@bin2chen)\n * [0xA5DF\u2197](https://code4rena.com/@0xA5DF)\n * [akshaysrivastav\u2197](https://code4rena.com/@akshaysrivastav)\n * [Join us](/register)\n\n## Start now.\n\nSecure the crypto ecosystem and get paid.\n\n[Become a Warden.](/register)\n\n * An open organization\n * [Twitter](https://twitter.com/code4rena)\n * [Discord](https://discord.gg/code4rena)\n * [GitHub](https://github.com/code-423n4/)\n * [Medium](https://medium.com/code4rena)\n * [Newsletter](/newsletter-signup)\n * [Media kit](https://github.com/code-423n4/media-kit)\n * [code4rena.eth](https://etherscan.io/address/0xC2BC2F890067C511215F9463A064221577A53E10)\n\n"} \ No newline at end of file diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb new file mode 100644 index 0000000..36e4b62 --- /dev/null +++ b/qa_bot/qa_bot.ipynb @@ -0,0 +1,1698 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CodeArena (C4) Question Answer bot\n", + "\n", + "### Objective\n", + "- This notebook has the PoC work for a Question Answer bot using C4's knowledge bases.\n", + "- The objective of the PoC is to prototype an LLM implementation that can accurately answer questions to their expectation and at the very least perform better than their current bot from [Mava](https://www.mava.app/)\n", + "\n", + "### Observations from the usage of Mava\n", + "- The platform offers Discord support management with ticketing and AI help bot features\n", + "- For the AI help bot, the user is able to specify links to multiple knowledge sources that can be used for answering questions.\n", + "- Based on C4's testing of the Mava bot in the private channel, the following stats were observed:-\n", + " - Total questions asked: 29\n", + " - Total questions mis-answered based on emoji reactions: 13\n", + " - Accuracy - ~55%\n", + "\n", + "### Knowledge Bases\n", + "Based on conversations with their team, the following knowledge bases were identified to be relevant and are the same ones that Mava is using:-\n", + "- [Main Website](https://code4rena.com/)\n", + "- [Docs](https://docs.code4rena.com/) \n", + "\n", + "\n", + "### High-level Approach\n", + "- Crawl and scrape C4’s website and docs using Scrapy lib\n", + "- Convert the html content to markdown format so that the model can better understand the context\n", + "- Use LangChain lib to do the following:-\n", + " - Split the markdown header-separated sections into semantic chunks\n", + " - Embed and store the semantic chunks in an in-memory vector db\n", + " - Use the retrieval augmented functionality to answer the question" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: langchain[llms] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.0.278)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (3.8.5)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.0.3)\n", + "Requirement already satisfied: PyYAML>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (6.0.1)\n", + "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.31)\n", + "Requirement already satisfied: numpy<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.25.2)\n", + "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.8.5)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.20)\n", + "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.5.14)\n", + "Requirement already satisfied: pydantic<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.10.12)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (8.2.3)\n", + "Requirement already satisfied: requests<3,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.31.0)\n", + "Requirement already satisfied: torch<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.1)\n", + "Requirement already satisfied: openai<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.28.0)\n", + "Requirement already satisfied: clarifai>=9.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (9.7.6)\n", + "Requirement already satisfied: nlpcloud<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.1.44)\n", + "Requirement already satisfied: cohere<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.21)\n", + "Requirement already satisfied: manifest-ml<0.0.2,>=0.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.1)\n", + "Requirement already satisfied: openlm<0.0.6,>=0.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.5)\n", + "Requirement already satisfied: huggingface_hub<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.16.4)\n", + "Requirement already satisfied: transformers<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.32.1)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (3.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.9.2)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (23.1.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.4.0)\n", + "Requirement already satisfied: tritonclient==2.34.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (2.34.0)\n", + "Requirement already satisfied: clarifai-grpc>=9.7.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (9.7.6)\n", + "Requirement already satisfied: tqdm==4.64.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (4.64.1)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", + "Requirement already satisfied: rich==13.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (13.4.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (2.16.1)\n", + "Requirement already satisfied: python-rapidjson>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tritonclient==2.34.0->clarifai>=9.1.0->langchain[llms]) (1.10)\n", + "Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (6.8.0)\n", + "Requirement already satisfied: backoff<3.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.2.1)\n", + "Requirement already satisfied: fastavro==1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (1.8.2)\n", + "Requirement already satisfied: urllib3<3,>=1.26 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.0.4)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (0.9.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (3.20.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (4.7.1)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (2023.6.0)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (3.12.3)\n", + "Requirement already satisfied: redis>=4.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (5.0.0)\n", + "Requirement already satisfied: sqlitedict>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (2.1.0)\n", + "Requirement already satisfied: dill>=0.3.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (0.3.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (2023.7.22)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain[llms]) (2.0.2)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1.2)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (1.12)\n", + "Requirement already satisfied: regex!=2019.12.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (2023.8.8)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.13.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.3.3)\n", + "Requirement already satisfied: grpcio>=1.44.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.57.0)\n", + "Requirement already satisfied: protobuf>=3.20.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (4.24.2)\n", + "Requirement already satisfied: googleapis-common-protos>=1.53.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.60.0)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5,>=4->langchain[llms]) (3.16.2)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (1.0.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch<3,>=1->langchain[llms]) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->torch<3,>=1->langchain[llms]) (1.3.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (0.1.2)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: Scrapy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.10.1)\n", + "Requirement already satisfied: Twisted<23.8.0,>=18.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (22.10.0)\n", + "Requirement already satisfied: w3lib>=1.17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.1.2)\n", + "Requirement already satisfied: service-identity>=18.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1.0)\n", + "Requirement already satisfied: cssselect>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.2.0)\n", + "Requirement already satisfied: lxml>=4.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (4.9.3)\n", + "Requirement already satisfied: protego>=0.1.15 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.3.0)\n", + "Requirement already satisfied: PyDispatcher>=2.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.0.7)\n", + "Requirement already satisfied: tldextract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (3.4.4)\n", + "Requirement already satisfied: pyOpenSSL>=21.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.2.0)\n", + "Requirement already satisfied: queuelib>=1.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.6.2)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1)\n", + "Requirement already satisfied: zope.interface>=5.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (6.0)\n", + "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (63.2.0)\n", + "Requirement already satisfied: itemadapter>=0.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.8.0)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (41.0.3)\n", + "Requirement already satisfied: itemloaders>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.1.0)\n", + "Requirement already satisfied: parsel>=1.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.8.1)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->Scrapy) (1.15.1)\n", + "Requirement already satisfied: jmespath>=0.9.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from itemloaders>=1.0.1->Scrapy) (1.0.1)\n", + "Requirement already satisfied: pyasn1-modules in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.3.0)\n", + "Requirement already satisfied: attrs>=19.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (23.1.0)\n", + "Requirement already satisfied: pyasn1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.5.0)\n", + "Requirement already satisfied: Automat>=0.8.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (4.7.1)\n", + "Requirement already satisfied: incremental>=21.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", + "Requirement already satisfied: constantly>=15.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (15.1.0)\n", + "Requirement already satisfied: hyperlink>=17.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (21.0.0)\n", + "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", + "Requirement already satisfied: requests>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (2.31.0)\n", + "Requirement already satisfied: requests-file>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (1.5.1)\n", + "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.3)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Automat>=0.8.0->Twisted<23.8.0,>=18.9.0->Scrapy) (1.16.0)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->Scrapy) (2.21)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (3.2.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: html2text in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2020.1.16)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (4.9.3)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: python-dotenv in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.0.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: unstructured[all-docs] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.10.11)\n", + "Requirement already satisfied: requests in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.31.0)\n", + "Requirement already satisfied: python-magic in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.4.27)\n", + "Requirement already satisfied: emoji in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.8.0)\n", + "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.14)\n", + "Requirement already satisfied: chardet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (5.2.0)\n", + "Requirement already satisfied: tabulate in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: filetype in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: nltk in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.8.1)\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.12.2)\n", + "Requirement already satisfied: ebooklib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.18)\n", + "Requirement already satisfied: pdf2image in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.16.3)\n", + "Requirement already satisfied: python-docx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.8.11)\n", + "Requirement already satisfied: markdown in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.4.4)\n", + "Requirement already satisfied: Pillow<10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (9.5.0)\n", + "Requirement already satisfied: xlrd in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: pdfminer.six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (20221105)\n", + "Requirement already satisfied: msg-parser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: unstructured-inference in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.19)\n", + "Requirement already satisfied: python-pptx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.22)\n", + "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.1.0)\n", + "Requirement already satisfied: openpyxl in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: pypandoc in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.11)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->unstructured[all-docs]) (2.4.1)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ebooklib->unstructured[all-docs]) (1.16.0)\n", + "Requirement already satisfied: olefile>=0.46 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from msg-parser->unstructured[all-docs]) (0.46)\n", + "Requirement already satisfied: click in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (8.1.7)\n", + "Requirement already satisfied: regex>=2021.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (2023.8.8)\n", + "Requirement already satisfied: joblib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (1.3.2)\n", + "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", + "Requirement already satisfied: et-xmlfile in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from openpyxl->unstructured[all-docs]) (1.1.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.25.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2.8.2)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (41.0.3)\n", + "Requirement already satisfied: charset-normalizer>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (3.2.0)\n", + "Requirement already satisfied: XlsxWriter>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-pptx->unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2023.7.22)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2.0.4)\n", + "Requirement already satisfied: opencv-python!=4.7.0.68 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.8.0.76)\n", + "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.32.1)\n", + "Requirement already satisfied: python-multipart in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.0.6)\n", + "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.3.4)\n", + "Requirement already satisfied: huggingface-hub in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.16.4)\n", + "Requirement already satisfied: onnxruntime in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: packaging>=17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->unstructured[all-docs]) (23.1)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.3.3)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.13.3)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.3)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (4.7.1)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (2023.6.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json->unstructured[all-docs]) (1.0.0)\n", + "Requirement already satisfied: pdfplumber in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.10.2)\n", + "Requirement already satisfied: iopath in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.1.10)\n", + "Requirement already satisfied: scipy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.11.2)\n", + "Requirement already satisfied: pytesseract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.3.10)\n", + "Requirement already satisfied: torchvision in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.15.2)\n", + "Requirement already satisfied: torch in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: effdet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.4.1)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (4.24.2)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (15.0.1)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (23.5.26)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (1.12)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.21)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime->unstructured-inference->unstructured[all-docs]) (10.0)\n", + "Requirement already satisfied: pycocotools>=2.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.7)\n", + "Requirement already satisfied: timm>=0.9.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.9.6)\n", + "Requirement already satisfied: omegaconf>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.3.0)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: portalocker in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.7.0)\n", + "Requirement already satisfied: pypdfium2>=4.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.19.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime->unstructured-inference->unstructured[all-docs]) (1.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.9.3)\n", + "Requirement already satisfied: matplotlib>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.7.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.1.3)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.1.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.4.5)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.42.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.11.0)\n", + "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.0.9)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: tiktoken in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.0)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", + "Requirement already satisfied: requests>=2.26.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2.31.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: faiss-cpu in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.7.4)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: GitPython in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (3.1.33)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from GitPython) (4.0.10)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from gitdb<5,>=4.0.1->GitPython) (5.0.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: notebook in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (7.0.3)\n", + "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (0.2.3)\n", + "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (6.3.3)\n", + "Requirement already satisfied: jupyterlab<5,>=4.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (4.0.5)\n", + "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.7.3)\n", + "Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.24.0)\n", + "Requirement already satisfied: jupyter-server-terminals in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.4.4)\n", + "Requirement already satisfied: nbconvert>=6.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.8.0)\n", + "Requirement already satisfied: prometheus-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", + "Requirement already satisfied: terminado>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", + "Requirement already satisfied: traitlets>=5.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.0)\n", + "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (4.0.0)\n", + "Requirement already satisfied: overrides in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.4.0)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", + "Requirement already satisfied: pyzmq>=24 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (25.1.1)\n", + "Requirement already satisfied: argon2-cffi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1.0)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", + "Requirement already satisfied: websocket-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.6.2)\n", + "Requirement already satisfied: send2trash>=1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.8.2)\n", + "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.3.1)\n", + "Requirement already satisfied: jupyter-client>=7.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (8.3.1)\n", + "Requirement already satisfied: jupyter-events>=0.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Requirement already satisfied: nbformat>=5.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.2)\n", + "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.1)\n", + "Requirement already satisfied: async-lru>=1.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.4)\n", + "Requirement already satisfied: tomli in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.1)\n", + "Requirement already satisfied: jupyter-lsp>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.2.0)\n", + "Requirement already satisfied: json5>=0.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (0.9.14)\n", + "Requirement already satisfied: babel>=2.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.12.1)\n", + "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.31.0)\n", + "Requirement already satisfied: jsonschema>=4.17.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (4.19.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.1.3)\n", + "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab<5,>=4.0.2->notebook) (4.7.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook) (2.1.3)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.30.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.10.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-client>=7.4.4->jupyter-server<3,>=2.4.0->notebook) (2.8.2)\n", + "Requirement already satisfied: platformdirs>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->jupyter-server<3,>=2.4.0->notebook) (3.10.0)\n", + "Requirement already satisfied: rfc3986-validator>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.1)\n", + "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", + "Requirement already satisfied: python-json-logger>=2.0.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (2.0.7)\n", + "Requirement already satisfied: rfc3339-validator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.4)\n", + "Requirement already satisfied: defusedxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.7.1)\n", + "Requirement already satisfied: bleach!=5.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (6.0.0)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.5.0)\n", + "Requirement already satisfied: nbclient>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.8.0)\n", + "Requirement already satisfied: tinycss2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.2.1)\n", + "Requirement already satisfied: pygments>=2.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.16.1)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", + "Requirement already satisfied: mistune<4,>=2.0.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (3.0.1)\n", + "Requirement already satisfied: jupyterlab-pygments in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.2.2)\n", + "Requirement already satisfied: fastjsonschema in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook) (2.18.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2.0.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (3.2.0)\n", + "Requirement already satisfied: ptyprocess in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from terminado>=0.8.3->jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Requirement already satisfied: argon2-cffi-bindings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (21.2.0)\n", + "Requirement already satisfied: debugpy>=1.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.6.7.post1)\n", + "Requirement already satisfied: ipython>=7.23.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (8.14.0)\n", + "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", + "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", + "Requirement already satisfied: nest-asyncio in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.5.7)\n", + "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", + "Requirement already satisfied: psutil in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.9.5)\n", + "Requirement already satisfied: webencodings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.5.1)\n", + "Requirement already satisfied: six>=1.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.16.0)\n", + "Requirement already satisfied: pexpect>4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", + "Requirement already satisfied: decorator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.1.1)\n", + "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (3.0.39)\n", + "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", + "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", + "Requirement already satisfied: stack-data in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.6.2)\n", + "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", + "Requirement already satisfied: isoduration in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (20.11.0)\n", + "Requirement already satisfied: fqdn in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.5.1)\n", + "Requirement already satisfied: uri-template in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.3.0)\n", + "Requirement already satisfied: webcolors>=1.11 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.13)\n", + "Requirement already satisfied: cffi>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (1.15.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.4.1)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (2.21)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.8.3)\n", + "Requirement already satisfied: wcwidth in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.6)\n", + "Requirement already satisfied: arrow>=0.15.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from isoduration->jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.2.3)\n", + "Requirement already satisfied: asttokens>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (2.2.1)\n", + "Requirement already satisfied: executing>=1.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.2.0)\n", + "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: chromadb in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.8)\n", + "Requirement already satisfied: pypika>=0.48.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.48.9)\n", + "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.0.1)\n", + "Requirement already satisfied: posthog>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.0.2)\n", + "Collecting tqdm>=4.65.0\n", + " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.7.2)\n", + "Requirement already satisfied: numpy>=1.21.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.25.2)\n", + "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", + "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.13.3)\n", + "Requirement already satisfied: importlib-resources in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (6.0.1)\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.15.1)\n", + "Requirement already satisfied: pydantic<2.0,>=1.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.10.12)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.7.1)\n", + "Requirement already satisfied: fastapi<0.100.0,>=0.95.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.99.1)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.23.2)\n", + "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.3.0)\n", + "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from fastapi<0.100.0,>=0.95.2->chromadb) (0.27.0)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.2)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", + "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: backoff>=1.10.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", + "Requirement already satisfied: monotonic>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", + "Requirement already satisfied: python-dateutil>2.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n", + "Requirement already satisfied: certifi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n", + "Requirement already satisfied: h11>=0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", + "Requirement already satisfied: click>=7.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.7)\n", + "Requirement already satisfied: httptools>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\n", + "Requirement already satisfied: websockets>=10.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.3)\n", + "Requirement already satisfied: watchfiles>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", + "Requirement already satisfied: python-dotenv>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", + "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (4.0.0)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.3)\n", + "Installing collected packages: tqdm\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.64.1\n", + " Uninstalling tqdm-4.64.1:\n", + " Successfully uninstalled tqdm-4.64.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "clarifai 9.7.6 requires tqdm==4.64.1, but you have tqdm 4.66.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed tqdm-4.66.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "# Install all the third-party packages\n", + "\n", + "!pip install 'langchain[llms]'\n", + "!pip install Scrapy\n", + "!pip install html2text\n", + "!pip install lxml\n", + "!pip install python-dotenv\n", + "!pip install \"unstructured[all-docs]\"\n", + "!pip install tiktoken\n", + "!pip install faiss-cpu \n", + "!pip install GitPython\n", + "!pip install notebook\n", + "!pip install chromadb" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# General setup - you can specify OPENAI_API_KEY in .env file\n", + "\n", + "import logging\n", + "from dotenv import load_dotenv\n", + "from IPython.display import display, Markdown, Latex\n", + "\n", + "logging.getLogger().setLevel(logging.INFO)\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') or getpass.getpass('Enter your OpenAI API key: ')\n", + "\n", + "assert OPENAI_API_KEY, \"Please set OPENAI_API_KEY in your environment variables\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Crawling and Scraping using Scrapy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import scrapy\n", + "import html2text\n", + "import lxml.html\n", + "import json\n", + "from urllib.parse import urlparse\n", + "\n", + "C4_WEBSITE_STORAGE_DIR = \"knowledge_base/c4/website\"\n", + "C4_DOCS_STORAGE_DIR = \"knowledge_base/c4/docs\"\n", + "\n", + "class GenericSpider(scrapy.Spider):\n", + " name = 'generic'\n", + "\n", + " def __init__(self, domain='', storage_dir='.', *args, **kwargs):\n", + " super(GenericSpider, self).__init__(*args, **kwargs)\n", + " self.allowed_domains = [domain]\n", + " self.start_urls = [f'http://{domain}/']\n", + " self.storage_dir = storage_dir\n", + " \n", + " def parse(self, response):\n", + " # Remove unwanted elements using lxml\n", + " tree = lxml.html.fromstring(response.text)\n", + " \n", + " # Remove non-text related tags\n", + " for unwanted in tree.xpath('//script|//img|//video|//audio|//iframe|//object|//embed|//canvas|//svg|//link|//source|//track|//map|//area'):\n", + " unwanted.drop_tree()\n", + "\n", + " cleaned_html = lxml.html.tostring(tree).decode('utf-8')\n", + "\n", + " # Convert HTML to Markdown\n", + " converter = html2text.HTML2Text()\n", + " markdown_text = converter.handle(cleaned_html)\n", + "\n", + " # Save to a markdown file in the specified directory\n", + " if not os.path.exists(self.storage_dir):\n", + " os.makedirs(self.storage_dir)\n", + "\n", + " url = response.url\n", + " page_name = response.url.split(\"/\")[-1] if response.url.split(\"/\")[-1] else \"index\"\n", + "\n", + " filename = os.path.join(self.storage_dir, f'{page_name}.json')\n", + "\n", + " with open(filename, 'w') as f:\n", + " # Store the URL and markdown text in JSON format\n", + " json.dump({'url': url, 'md_content': markdown_text}, f)\n", + "\n", + " # Recursively follow relative links to other pages on the same domain\n", + " for href in response.css('a::attr(href)').getall():\n", + " url = response.urljoin(href)\n", + " if urlparse(url).netloc in self.allowed_domains:\n", + " yield scrapy.Request(url, self.parse)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NOTE: Data has already been scraped and saved locally as JSON files in the 'knowledge_base/c4' directory. To re-run the scraping, uncomment the code in the cell below.\n", + "\n", + "On re-running the crawler, if you get 'ReactorNotRestartable' error, the notebook kernel would need to be restarted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from scrapy.crawler import CrawlerRunner\n", + "# from scrapy.utils.project import get_project_settings\n", + "# from twisted.internet import reactor\n", + "\n", + "# settings = get_project_settings()\n", + "\n", + "# runner = CrawlerRunner(settings)\n", + "# runner.crawl(GenericSpider, domain=\"code4rena.com\", storage_dir=C4_WEBSITE_STORAGE_DIR)\n", + "# runner.crawl(GenericSpider, domain=\"docs.code4rena.com\", storage_dir=C4_DOCS_STORAGE_DIR)\n", + "# d = runner.join()\n", + "# d.addBoth(lambda _: reactor.stop())\n", + "# reactor.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieval Augmented Generation using LangChain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Load locally saved scraped data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.document_loaders import TextLoader\n", + "\n", + "def load_json_files(dir):\n", + " loader = DirectoryLoader(dir, loader_cls=TextLoader)\n", + " documents = loader.load()\n", + " for d in documents:\n", + " page_content_dict = json.loads(d.page_content)\n", + " d.page_content = page_content_dict['md_content']\n", + " d.metadata['url'] = page_content_dict['url']\n", + " return documents\n", + "\n", + "c4_website_data_list = load_json_files(C4_WEBSITE_STORAGE_DIR)\n", + "c4_docs_data_list = load_json_files(C4_DOCS_STORAGE_DIR)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Split the markdown content into semantic chunks" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "89\n", + "97\n" + ] + } + ], + "source": [ + "from langchain.text_splitter import (\n", + " RecursiveCharacterTextSplitter,\n", + " Language,\n", + ")\n", + "\n", + "md_splitter = RecursiveCharacterTextSplitter.from_language(\n", + " language=Language.MARKDOWN, chunk_size=2000, chunk_overlap=200\n", + ")\n", + "\n", + "\n", + "website_chunks = md_splitter.split_documents(c4_website_data_list)\n", + "docs_chunks = md_splitter.split_documents(c4_docs_data_list)\n", + "\n", + "print(len(website_chunks))\n", + "print(len(docs_chunks))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Embed the semantic chunks and store in an in-memory vector db" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['d906998a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069ab6-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069b10-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069b42-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069b7e-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069bb0-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069be2-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069c14-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069c46-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069c78-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069caa-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069cd2-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069d04-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069d36-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069d68-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069d9a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069dc2-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069df4-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069e26-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069e58-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069e8a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069ebc-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069ee4-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069f16-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069f48-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069f7a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069fac-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd9069fde-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a006-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a038-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a06a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a09c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a0ce-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a100-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a128-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a15a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a18c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a1be-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a1f0-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a218-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a24a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a27c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a2ae-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a2e0-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a312-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a344-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a36c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a39e-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a3d0-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a402-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a42a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a45c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a48e-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a4c0-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a4f2-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a51a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a54c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a57e-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a5b0-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a5e2-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a614-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a646-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a678-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a6aa-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a6dc-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a70e-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a740-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a772-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a7a4-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a7cc-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a7fe-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a830-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a862-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a88a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a8bc-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a8ee-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a920-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a952-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a984-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a9b6-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906a9e8-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906aa1a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906aa4c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906aa74-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906aaa6-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906aad8-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906ab0a-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906ab32-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906ab64-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906ab96-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906abc8-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906abfa-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906ac2c-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906ac5e-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906ac90-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906acc2-4b75-11ee-80c1-367dda1ae1c5',\n", + " 'd906acf4-4b75-11ee-80c1-367dda1ae1c5']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.vectorstores import FAISS\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "\n", + "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "vectorstore = Chroma(\"langchain_store\", embeddings)\n", + "\n", + "vectorstore.add_documents(website_chunks)\n", + "vectorstore.add_documents(docs_chunks)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Retrieval QA chain" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), return_source_documents=True)\n", + "\n", + "def ask(question):\n", + " result = qa({\"query\": question})\n", + " display(Markdown(f\"### Question\"))\n", + " display(Markdown(question))\n", + "\n", + " display(Markdown(f\"### Answer\"))\n", + " display(Markdown(result[\"result\"]))\n", + "\n", + " display(Markdown(f\"### Sources\"))\n", + " sources = [r.metadata['url'] for r in result[\"source_documents\"]]\n", + " print(\", \".join(sources))" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", + "MAVA_MISANSWERED_QUES = [\n", + " \"what's a scout?\",\n", + " \"Am I allowed to use AI in an audit?\",\n", + " \"Can I change my Code4rena username?\",\n", + " \"How do I book a solo audit?\",\n", + " \"Do I need to be certified to participate in an audit?\",\n", + " \"How do bot races work?\",\n", + " \"Can I change my Code4rena profile name?\",\n", + " \"What are scout awards?\",\n", + " \"What are analysis reports?\",\n", + " \"what is an analysis finding?\",\n", + " \"My name wasn't in the award announcements. When can I check on my results?\",\n", + " \"How long does the certification process take?\",\n", + " \"How can I access findings.csv?\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "what's a scout?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "A Scout in the context of Code4rena is a role that focuses on scoping and pre-audit intel. Currently, Scouts are hand-picked by the C4 team as it's a highly sensitive role." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/roles/certified-contributors/lookouts, https://docs.code4rena.com/structure/frequently-asked-questions, https://code4rena.com/how-it-works, https://code4rena.com/how-it-works\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Am I allowed to use AI in an audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Yes, you are allowed to use AI in an audit, but there are some restrictions. Code4rena runs a Bot Race at the start of each audit where wardens compete to see whose AI-driven bot can create the highest quality and most thorough audit report. The winning report is shared with all C4 wardens and all findings in the winning Bot Report will be declared publicly known issues, and therefore ineligible for awards. \n", + "\n", + "However, using the output of AI tools like ChatGPT, GPT-3, or other automated tools for audit submissions is highly discouraged as it often leads to a high ratio of nonsense submissions. Wardens may use automated tools as a first pass, and build on these findings to identify High and Medium severity issues. But, submissions based on automated tools will have a higher burden of proof for demonstrating to sponsors a relevant exploit path in order to be considered satisfactory." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/awarding/incentive-model-and-awards\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Can I change my Code4rena username?How do I book a solo audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The text does not provide information on whether you can change your Code4rena username.\n", + "\n", + "To book a solo audit, a project team member needs to click the \"Get a quote\" button on a warden's profile and share scoping details with the Code4rena team. Code4rena staff will then consult with the warden and project team to firm up scoping, pricing, and dates." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/structure/frequently-asked-questions, https://docs.code4rena.com/roles/wardens/solo-audits, https://code4rena.com/register, https://code4rena.com/register\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Do I need to be certified to participate in an audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Yes, to participate in an audit as a Certified Warden, you need to be certified. The certification process involves submitting the Certified Contributor Application form and providing necessary documents such as a local authority document that is less than 3 months old. Once your application is approved, you can participate in audits." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/roles/wardens, https://docs.code4rena.com/roles/wardens/solo-audits\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "How do bot races work?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "I'm sorry, but the provided context does not contain information on how bot races work." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/roles/judges, https://docs.code4rena.com/roles/judges/how-to-judge-a-contest, https://docs.code4rena.com/roles/sponsors\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Can I change my Code4rena profile name?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The provided context does not include information on whether you can change your Code4rena profile name." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/roles/wardens/warden-auth, https://code4rena.com/help, https://code4rena.com/help, https://code4rena.com/contests/2023-05-chainlink-cross-chain-services-ccip-and-arm-network\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "What are scout awards?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "I'm sorry, but the provided context does not contain any information about \"scout awards.\"" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/philosophy/security-is-about-people, https://docs.code4rena.com/roles/certified-contributors/lookouts, https://docs.code4rena.com/awarding/incentive-model-and-awards/awarding-process, https://docs.code4rena.com/awarding/judging-criteria\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "What are analysis reports?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Analysis reports are written submissions that outline the Wardens' analysis of the codebase as a whole, any observations or advice they have about architecture, mechanism, or approach, broader concerns like systemic risks or centralization risks, and the approach taken in reviewing the code. They also include new insights and learnings from the audit. These reports provide wardens with an opportunity to contribute value through high level insights and advice that aren't necessarily covered by specific bugs. Analyses are judged A/B/C, with the top Analysis selected for inclusion in the audit report." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/awarding/judging-criteria, https://docs.code4rena.com/awarding/incentive-model-and-awards, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/awarding/incentive-model-and-awards\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "what is an analysis finding?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "An analysis is a written submission that outlines the Wardens' analysis of the codebase as a whole and any observations or advice they have about architecture, mechanism, or approach. It also includes broader concerns like systemic risks or centralization risks, the approach taken in reviewing the code, and new insights and learnings from the audit. Analyses are judged A/B/C, with the top Analysis selected for inclusion in the audit report. They provide wardens with an opportunity to contribute value through high level insights and advice that aren't necessarily covered by specific bugs." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/awarding/judging-criteria, https://docs.code4rena.com/awarding/incentive-model-and-awards, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/structure/frequently-asked-questions\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "My name wasn't in the award announcements. When can I check on my results?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Based on the audit timeline provided, the judging QA is completed and awards are announced between Day 25-34 after audit submissions close. If your name wasn't in the award announcements, you may want to wait until this period is over to check on your results. If you still don't see your award after this time, there might be other issues at play and you may need to contact the Code4rena Foundation for further assistance." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/awarding/incentive-model-and-awards/awarding-process, https://docs.code4rena.com/roles/wardens/warden-auth, https://docs.code4rena.com/awarding/incentive-model-and-awards/qa-gas-report-faq, https://docs.code4rena.com/structure/our-process\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "How long does the certification process take?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Once you submit the Certified Contributor Application form, Provenance typically emails you within one business day. If you have all the available documents, the process can usually be completed within a day. However, it will take longer if you need to assemble the necessary documents." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/structure/our-process, https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/roles/wardens\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "How can I access findings.csv?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "I'm sorry, but the provided context does not contain information on how to access findings.csv." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/structure/frequently-asked-questions, https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/roles/wardens/warden-auth\n" + ] + } + ], + "source": [ + "for q in MAVA_MISANSWERED_QUES:\n", + " ask(q)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "c4-chatbot", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 29fdc8762065e19ebeed5c8c35717b9cc49db9ec Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Mon, 4 Sep 2023 19:28:05 -0500 Subject: [PATCH 03/11] wip --- qa_bot/qa_bot.ipynb | 514 +------------------------------------------- 1 file changed, 4 insertions(+), 510 deletions(-) diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index 36e4b62..501750b 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -35,396 +35,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: langchain[llms] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.0.278)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (3.8.5)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.0.3)\n", - "Requirement already satisfied: PyYAML>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (6.0.1)\n", - "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.31)\n", - "Requirement already satisfied: numpy<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.25.2)\n", - "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.8.5)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.20)\n", - "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.5.14)\n", - "Requirement already satisfied: pydantic<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.10.12)\n", - "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (8.2.3)\n", - "Requirement already satisfied: requests<3,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.31.0)\n", - "Requirement already satisfied: torch<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.1)\n", - "Requirement already satisfied: openai<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.28.0)\n", - "Requirement already satisfied: clarifai>=9.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (9.7.6)\n", - "Requirement already satisfied: nlpcloud<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.1.44)\n", - "Requirement already satisfied: cohere<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.21)\n", - "Requirement already satisfied: manifest-ml<0.0.2,>=0.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.1)\n", - "Requirement already satisfied: openlm<0.0.6,>=0.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.5)\n", - "Requirement already satisfied: huggingface_hub<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.16.4)\n", - "Requirement already satisfied: transformers<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.32.1)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (3.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (6.0.4)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.9.2)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (23.1.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.4.0)\n", - "Requirement already satisfied: tritonclient==2.34.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (2.34.0)\n", - "Requirement already satisfied: clarifai-grpc>=9.7.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (9.7.6)\n", - "Requirement already satisfied: tqdm==4.64.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (4.64.1)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", - "Requirement already satisfied: rich==13.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (13.4.2)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (2.16.1)\n", - "Requirement already satisfied: python-rapidjson>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tritonclient==2.34.0->clarifai>=9.1.0->langchain[llms]) (1.10)\n", - "Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (6.8.0)\n", - "Requirement already satisfied: backoff<3.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.2.1)\n", - "Requirement already satisfied: fastavro==1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (1.8.2)\n", - "Requirement already satisfied: urllib3<3,>=1.26 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.0.4)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (0.9.0)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (3.20.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (4.7.1)\n", - "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (2023.6.0)\n", - "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (3.12.3)\n", - "Requirement already satisfied: redis>=4.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (5.0.0)\n", - "Requirement already satisfied: sqlitedict>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (2.1.0)\n", - "Requirement already satisfied: dill>=0.3.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (0.3.7)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (2023.7.22)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain[llms]) (2.0.2)\n", - "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1)\n", - "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1.2)\n", - "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (1.12)\n", - "Requirement already satisfied: regex!=2019.12.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (2023.8.8)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.13.3)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.3.3)\n", - "Requirement already satisfied: grpcio>=1.44.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.57.0)\n", - "Requirement already satisfied: protobuf>=3.20.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (4.24.2)\n", - "Requirement already satisfied: googleapis-common-protos>=1.53.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.60.0)\n", - "Requirement already satisfied: zipp>=0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5,>=4->langchain[llms]) (3.16.2)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (1.0.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch<3,>=1->langchain[llms]) (2.1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->torch<3,>=1->langchain[llms]) (1.3.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (0.1.2)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: Scrapy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.10.1)\n", - "Requirement already satisfied: Twisted<23.8.0,>=18.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (22.10.0)\n", - "Requirement already satisfied: w3lib>=1.17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.1.2)\n", - "Requirement already satisfied: service-identity>=18.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1.0)\n", - "Requirement already satisfied: cssselect>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.2.0)\n", - "Requirement already satisfied: lxml>=4.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (4.9.3)\n", - "Requirement already satisfied: protego>=0.1.15 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.3.0)\n", - "Requirement already satisfied: PyDispatcher>=2.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.0.7)\n", - "Requirement already satisfied: tldextract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (3.4.4)\n", - "Requirement already satisfied: pyOpenSSL>=21.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.2.0)\n", - "Requirement already satisfied: queuelib>=1.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.6.2)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1)\n", - "Requirement already satisfied: zope.interface>=5.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (6.0)\n", - "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (63.2.0)\n", - "Requirement already satisfied: itemadapter>=0.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.8.0)\n", - "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (41.0.3)\n", - "Requirement already satisfied: itemloaders>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.1.0)\n", - "Requirement already satisfied: parsel>=1.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.8.1)\n", - "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->Scrapy) (1.15.1)\n", - "Requirement already satisfied: jmespath>=0.9.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from itemloaders>=1.0.1->Scrapy) (1.0.1)\n", - "Requirement already satisfied: pyasn1-modules in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.3.0)\n", - "Requirement already satisfied: attrs>=19.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (23.1.0)\n", - "Requirement already satisfied: pyasn1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.5.0)\n", - "Requirement already satisfied: Automat>=0.8.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", - "Requirement already satisfied: typing-extensions>=3.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (4.7.1)\n", - "Requirement already satisfied: incremental>=21.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", - "Requirement already satisfied: constantly>=15.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (15.1.0)\n", - "Requirement already satisfied: hyperlink>=17.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (21.0.0)\n", - "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", - "Requirement already satisfied: requests>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (2.31.0)\n", - "Requirement already satisfied: requests-file>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (1.5.1)\n", - "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.3)\n", - "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Automat>=0.8.0->Twisted<23.8.0,>=18.9.0->Scrapy) (1.16.0)\n", - "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->Scrapy) (2.21)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (3.2.0)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: html2text in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2020.1.16)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (4.9.3)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: python-dotenv in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.0.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: unstructured[all-docs] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.10.11)\n", - "Requirement already satisfied: requests in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.31.0)\n", - "Requirement already satisfied: python-magic in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.4.27)\n", - "Requirement already satisfied: emoji in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.8.0)\n", - "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.14)\n", - "Requirement already satisfied: chardet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (5.2.0)\n", - "Requirement already satisfied: tabulate in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.9.0)\n", - "Requirement already satisfied: filetype in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", - "Requirement already satisfied: nltk in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.8.1)\n", - "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", - "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.12.2)\n", - "Requirement already satisfied: ebooklib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.18)\n", - "Requirement already satisfied: pdf2image in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.16.3)\n", - "Requirement already satisfied: python-docx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.8.11)\n", - "Requirement already satisfied: markdown in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.4.4)\n", - "Requirement already satisfied: Pillow<10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (9.5.0)\n", - "Requirement already satisfied: xlrd in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.0.1)\n", - "Requirement already satisfied: pdfminer.six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (20221105)\n", - "Requirement already satisfied: msg-parser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", - "Requirement already satisfied: unstructured-inference in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.19)\n", - "Requirement already satisfied: python-pptx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.22)\n", - "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.1.0)\n", - "Requirement already satisfied: openpyxl in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.1.2)\n", - "Requirement already satisfied: pypandoc in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.11)\n", - "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->unstructured[all-docs]) (2.4.1)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", - "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ebooklib->unstructured[all-docs]) (1.16.0)\n", - "Requirement already satisfied: olefile>=0.46 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from msg-parser->unstructured[all-docs]) (0.46)\n", - "Requirement already satisfied: click in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (8.1.7)\n", - "Requirement already satisfied: regex>=2021.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (2023.8.8)\n", - "Requirement already satisfied: joblib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (1.3.2)\n", - "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", - "Requirement already satisfied: et-xmlfile in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from openpyxl->unstructured[all-docs]) (1.1.0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.25.2)\n", - "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2.8.2)\n", - "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (41.0.3)\n", - "Requirement already satisfied: charset-normalizer>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (3.2.0)\n", - "Requirement already satisfied: XlsxWriter>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-pptx->unstructured[all-docs]) (3.1.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2023.7.22)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2.0.4)\n", - "Requirement already satisfied: opencv-python!=4.7.0.68 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.8.0.76)\n", - "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.32.1)\n", - "Requirement already satisfied: python-multipart in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.0.6)\n", - "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.3.4)\n", - "Requirement already satisfied: huggingface-hub in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.16.4)\n", - "Requirement already satisfied: onnxruntime in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (1.15.1)\n", - "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.15.1)\n", - "Requirement already satisfied: packaging>=17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->unstructured[all-docs]) (23.1)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.3.3)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.13.3)\n", - "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.3)\n", - "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (4.7.1)\n", - "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (2023.6.0)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json->unstructured[all-docs]) (1.0.0)\n", - "Requirement already satisfied: pdfplumber in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.10.2)\n", - "Requirement already satisfied: iopath in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.1.10)\n", - "Requirement already satisfied: scipy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.11.2)\n", - "Requirement already satisfied: pytesseract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.3.10)\n", - "Requirement already satisfied: torchvision in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.15.2)\n", - "Requirement already satisfied: torch in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.1)\n", - "Requirement already satisfied: effdet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.4.1)\n", - "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (4.24.2)\n", - "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (15.0.1)\n", - "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (23.5.26)\n", - "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (1.12)\n", - "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.21)\n", - "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime->unstructured-inference->unstructured[all-docs]) (10.0)\n", - "Requirement already satisfied: pycocotools>=2.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.7)\n", - "Requirement already satisfied: timm>=0.9.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.9.6)\n", - "Requirement already satisfied: omegaconf>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.3.0)\n", - "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1)\n", - "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1.2)\n", - "Requirement already satisfied: portalocker in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.7.0)\n", - "Requirement already satisfied: pypdfium2>=4.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.19.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime->unstructured-inference->unstructured[all-docs]) (1.3.0)\n", - "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.9.3)\n", - "Requirement already satisfied: matplotlib>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.7.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.1.3)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.1.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.4.5)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.42.1)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.11.0)\n", - "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.0.9)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: tiktoken in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.0)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", - "Requirement already satisfied: requests>=2.26.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2.31.0)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: faiss-cpu in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.7.4)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: GitPython in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (3.1.33)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from GitPython) (4.0.10)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from gitdb<5,>=4.0.1->GitPython) (5.0.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: notebook in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (7.0.3)\n", - "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (0.2.3)\n", - "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (6.3.3)\n", - "Requirement already satisfied: jupyterlab<5,>=4.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (4.0.5)\n", - "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.7.3)\n", - "Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.24.0)\n", - "Requirement already satisfied: jupyter-server-terminals in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.4.4)\n", - "Requirement already satisfied: nbconvert>=6.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.8.0)\n", - "Requirement already satisfied: prometheus-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", - "Requirement already satisfied: terminado>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", - "Requirement already satisfied: traitlets>=5.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.0)\n", - "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (4.0.0)\n", - "Requirement already satisfied: overrides in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.4.0)\n", - "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", - "Requirement already satisfied: pyzmq>=24 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (25.1.1)\n", - "Requirement already satisfied: argon2-cffi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1.0)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", - "Requirement already satisfied: websocket-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.6.2)\n", - "Requirement already satisfied: send2trash>=1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.8.2)\n", - "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.3.1)\n", - "Requirement already satisfied: jupyter-client>=7.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (8.3.1)\n", - "Requirement already satisfied: jupyter-events>=0.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", - "Requirement already satisfied: nbformat>=5.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.2)\n", - "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.1)\n", - "Requirement already satisfied: async-lru>=1.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.4)\n", - "Requirement already satisfied: tomli in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.1)\n", - "Requirement already satisfied: jupyter-lsp>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.2.0)\n", - "Requirement already satisfied: json5>=0.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (0.9.14)\n", - "Requirement already satisfied: babel>=2.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.12.1)\n", - "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.31.0)\n", - "Requirement already satisfied: jsonschema>=4.17.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (4.19.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.1.3)\n", - "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", - "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab<5,>=4.0.2->notebook) (4.7.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook) (2.1.3)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.30.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.10.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-client>=7.4.4->jupyter-server<3,>=2.4.0->notebook) (2.8.2)\n", - "Requirement already satisfied: platformdirs>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->jupyter-server<3,>=2.4.0->notebook) (3.10.0)\n", - "Requirement already satisfied: rfc3986-validator>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.1)\n", - "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", - "Requirement already satisfied: python-json-logger>=2.0.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (2.0.7)\n", - "Requirement already satisfied: rfc3339-validator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.4)\n", - "Requirement already satisfied: defusedxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.7.1)\n", - "Requirement already satisfied: bleach!=5.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (6.0.0)\n", - "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.5.0)\n", - "Requirement already satisfied: nbclient>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.8.0)\n", - "Requirement already satisfied: tinycss2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.2.1)\n", - "Requirement already satisfied: pygments>=2.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.16.1)\n", - "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", - "Requirement already satisfied: mistune<4,>=2.0.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (3.0.1)\n", - "Requirement already satisfied: jupyterlab-pygments in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.2.2)\n", - "Requirement already satisfied: fastjsonschema in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook) (2.18.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2.0.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (3.2.0)\n", - "Requirement already satisfied: ptyprocess in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from terminado>=0.8.3->jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", - "Requirement already satisfied: argon2-cffi-bindings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (21.2.0)\n", - "Requirement already satisfied: debugpy>=1.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.6.7.post1)\n", - "Requirement already satisfied: ipython>=7.23.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (8.14.0)\n", - "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", - "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", - "Requirement already satisfied: nest-asyncio in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.5.7)\n", - "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", - "Requirement already satisfied: psutil in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.9.5)\n", - "Requirement already satisfied: webencodings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.5.1)\n", - "Requirement already satisfied: six>=1.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.16.0)\n", - "Requirement already satisfied: pexpect>4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", - "Requirement already satisfied: decorator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.1.1)\n", - "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", - "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (3.0.39)\n", - "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", - "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", - "Requirement already satisfied: stack-data in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.6.2)\n", - "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", - "Requirement already satisfied: isoduration in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (20.11.0)\n", - "Requirement already satisfied: fqdn in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.5.1)\n", - "Requirement already satisfied: uri-template in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.3.0)\n", - "Requirement already satisfied: webcolors>=1.11 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.13)\n", - "Requirement already satisfied: cffi>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (1.15.1)\n", - "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.4.1)\n", - "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (2.21)\n", - "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.8.3)\n", - "Requirement already satisfied: wcwidth in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.6)\n", - "Requirement already satisfied: arrow>=0.15.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from isoduration->jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.2.3)\n", - "Requirement already satisfied: asttokens>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (2.2.1)\n", - "Requirement already satisfied: executing>=1.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.2.0)\n", - "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: chromadb in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.8)\n", - "Requirement already satisfied: pypika>=0.48.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.48.9)\n", - "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.0.1)\n", - "Requirement already satisfied: posthog>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.0.2)\n", - "Collecting tqdm>=4.65.0\n", - " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", - "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.7.2)\n", - "Requirement already satisfied: numpy>=1.21.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.25.2)\n", - "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", - "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", - "Requirement already satisfied: tokenizers>=0.13.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.13.3)\n", - "Requirement already satisfied: importlib-resources in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (6.0.1)\n", - "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.15.1)\n", - "Requirement already satisfied: pydantic<2.0,>=1.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.10.12)\n", - "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.7.1)\n", - "Requirement already satisfied: fastapi<0.100.0,>=0.95.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.99.1)\n", - "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.23.2)\n", - "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.3.0)\n", - "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from fastapi<0.100.0,>=0.95.2->chromadb) (0.27.0)\n", - "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.2)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", - "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", - "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", - "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", - "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", - "Requirement already satisfied: backoff>=1.10.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", - "Requirement already satisfied: monotonic>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", - "Requirement already satisfied: python-dateutil>2.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n", - "Requirement already satisfied: certifi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n", - "Requirement already satisfied: h11>=0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", - "Requirement already satisfied: click>=7.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.7)\n", - "Requirement already satisfied: httptools>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\n", - "Requirement already satisfied: websockets>=10.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.3)\n", - "Requirement already satisfied: watchfiles>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", - "Requirement already satisfied: python-dotenv>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", - "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", - "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (4.0.0)\n", - "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.3)\n", - "Installing collected packages: tqdm\n", - " Attempting uninstall: tqdm\n", - " Found existing installation: tqdm 4.64.1\n", - " Uninstalling tqdm-4.64.1:\n", - " Successfully uninstalled tqdm-4.64.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "clarifai 9.7.6 requires tqdm==4.64.1, but you have tqdm 4.66.1 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed tqdm-4.66.1\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "# Install all the third-party packages\n", "\n", @@ -662,116 +275,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['d906998a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069ab6-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069b10-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069b42-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069b7e-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069bb0-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069be2-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069c14-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069c46-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069c78-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069caa-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069cd2-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069d04-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069d36-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069d68-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069d9a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069dc2-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069df4-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069e26-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069e58-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069e8a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069ebc-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069ee4-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069f16-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069f48-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069f7a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069fac-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd9069fde-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a006-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a038-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a06a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a09c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a0ce-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a100-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a128-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a15a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a18c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a1be-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a1f0-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a218-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a24a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a27c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a2ae-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a2e0-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a312-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a344-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a36c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a39e-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a3d0-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a402-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a42a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a45c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a48e-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a4c0-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a4f2-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a51a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a54c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a57e-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a5b0-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a5e2-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a614-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a646-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a678-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a6aa-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a6dc-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a70e-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a740-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a772-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a7a4-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a7cc-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a7fe-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a830-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a862-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a88a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a8bc-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a8ee-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a920-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a952-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a984-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a9b6-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906a9e8-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906aa1a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906aa4c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906aa74-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906aaa6-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906aad8-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906ab0a-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906ab32-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906ab64-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906ab96-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906abc8-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906abfa-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906ac2c-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906ac5e-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906ac90-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906acc2-4b75-11ee-80c1-367dda1ae1c5',\n", - " 'd906acf4-4b75-11ee-80c1-367dda1ae1c5']" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from langchain.vectorstores import FAISS\n", "from langchain.embeddings import OpenAIEmbeddings\n", @@ -1659,18 +1165,6 @@ "for q in MAVA_MISANSWERED_QUES:\n", " ask(q)" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 937275919448c09ac9f597ef1fd051ab6dec4efb Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Mon, 4 Sep 2023 19:47:52 -0500 Subject: [PATCH 04/11] wip --- qa_bot/qa_bot.ipynb | 104 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index 501750b..ecdc3f6 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -325,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -349,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -391,7 +391,7 @@ { "data": { "text/markdown": [ - "A Scout in the context of Code4rena is a role that focuses on scoping and pre-audit intel. Currently, Scouts are hand-picked by the C4 team as it's a highly sensitive role." + "A Scout in Code4rena is a role that focuses on scoping and pre-audit intel. Currently, Scouts are hand-picked by the C4 team as it's a highly sensitive role." ], "text/plain": [ "" @@ -460,7 +460,7 @@ "text/markdown": [ "Yes, you are allowed to use AI in an audit, but there are some restrictions. Code4rena runs a Bot Race at the start of each audit where wardens compete to see whose AI-driven bot can create the highest quality and most thorough audit report. The winning report is shared with all C4 wardens and all findings in the winning Bot Report will be declared publicly known issues, and therefore ineligible for awards. \n", "\n", - "However, using the output of AI tools like ChatGPT, GPT-3, or other automated tools for audit submissions is highly discouraged as it often leads to a high ratio of nonsense submissions. Wardens may use automated tools as a first pass, and build on these findings to identify High and Medium severity issues. But, submissions based on automated tools will have a higher burden of proof for demonstrating to sponsors a relevant exploit path in order to be considered satisfactory." + "However, using the output of AI tools like ChatGPT, GPT-3, or other automated tools for audit submissions is highly discouraged as it often leads to a high ratio of nonsense submissions. If you use automated tools as a first pass, you will have a higher burden of proof for demonstrating a relevant High and Medium severity exploit path to be considered satisfactory." ], "text/plain": [ "" @@ -503,7 +503,7 @@ { "data": { "text/markdown": [ - "Can I change my Code4rena username?How do I book a solo audit?" + "Can I change my Code4rena username?" ], "text/plain": [ "" @@ -527,9 +527,7 @@ { "data": { "text/markdown": [ - "The text does not provide information on whether you can change your Code4rena username.\n", - "\n", - "To book a solo audit, a project team member needs to click the \"Get a quote\" button on a warden's profile and share scoping details with the Code4rena team. Code4rena staff will then consult with the warden and project team to firm up scoping, pricing, and dates." + "No, you cannot change your Code4rena username. Once chosen, your username cannot be changed later." ], "text/plain": [ "" @@ -554,7 +552,74 @@ "name": "stdout", "output_type": "stream", "text": [ - "https://docs.code4rena.com/structure/frequently-asked-questions, https://docs.code4rena.com/roles/wardens/solo-audits, https://code4rena.com/register, https://code4rena.com/register\n" + "https://docs.code4rena.com/roles/wardens/warden-auth, https://code4rena.com/register, https://code4rena.com/register, https://code4rena.com/help\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "How do I book a solo audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The process of booking a solo audit begins with a project team member clicking the \"Get a quote\" button on a warden's profile, and sharing scoping details with the Code4rena team. Code4rena staff will then consult with the warden and project team to firm up scoping, pricing, and dates." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://docs.code4rena.com/roles/wardens/solo-audits, https://code4rena.com/register, https://code4rena.com/register, https://docs.code4rena.com/structure/frequently-asked-questions\n" ] }, { @@ -596,7 +661,7 @@ { "data": { "text/markdown": [ - "Yes, to participate in an audit as a Certified Warden, you need to be certified. The certification process involves submitting the Certified Contributor Application form and providing necessary documents such as a local authority document that is less than 3 months old. Once your application is approved, you can participate in audits." + "Yes, you need to be a Certified Contributor to participate in an audit according to the information provided. You can become a Certified Contributor by submitting the Certified Contributor Application form." ], "text/plain": [ "" @@ -663,7 +728,7 @@ { "data": { "text/markdown": [ - "I'm sorry, but the provided context does not contain information on how bot races work." + "I'm sorry, but the provided context does not contain any information about bot races." ], "text/plain": [ "" @@ -688,7 +753,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/roles/judges, https://docs.code4rena.com/roles/judges/how-to-judge-a-contest, https://docs.code4rena.com/roles/sponsors\n" + "https://docs.code4rena.com/roles/judges, https://docs.code4rena.com/roles/judges/how-to-judge-a-contest, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/roles/sponsors\n" ] }, { @@ -797,7 +862,7 @@ { "data": { "text/markdown": [ - "I'm sorry, but the provided context does not contain any information about \"scout awards.\"" + "I'm sorry, but the provided context does not contain any information about \"scout awards\"." ], "text/plain": [ "" @@ -931,7 +996,7 @@ { "data": { "text/markdown": [ - "An analysis is a written submission that outlines the Wardens' analysis of the codebase as a whole and any observations or advice they have about architecture, mechanism, or approach. It also includes broader concerns like systemic risks or centralization risks, the approach taken in reviewing the code, and new insights and learnings from the audit. Analyses are judged A/B/C, with the top Analysis selected for inclusion in the audit report. They provide wardens with an opportunity to contribute value through high level insights and advice that aren't necessarily covered by specific bugs." + "An analysis finding is a written submission that outlines the Wardens' analysis of the codebase as a whole and any observations or advice they have about architecture, mechanism, or approach. It also includes broader concerns like systemic risks or centralization risks, the approach taken in reviewing the code, and new insights and learnings from the audit. Analyses are judged A/B/C, with the top Analysis selected for inclusion in the audit report." ], "text/plain": [ "" @@ -998,7 +1063,7 @@ { "data": { "text/markdown": [ - "Based on the audit timeline provided, the judging QA is completed and awards are announced between Day 25-34 after audit submissions close. If your name wasn't in the award announcements, you may want to wait until this period is over to check on your results. If you still don't see your award after this time, there might be other issues at play and you may need to contact the Code4rena Foundation for further assistance." + "Based on the audit timeline provided by Code4rena Foundation, the judging QA is completed and awards are announced between Day 25-34 after audit submissions close. If your name wasn't in the award announcements, you may want to wait until this period is over. If you still don't see your award after this time, you may need to contact the foundation for further assistance." ], "text/plain": [ "" @@ -1132,7 +1197,7 @@ { "data": { "text/markdown": [ - "I'm sorry, but the provided context does not contain information on how to access findings.csv." + "I'm sorry, but the provided context does not contain information on how to access a file named \"findings.csv\"." ], "text/plain": [ "" @@ -1165,6 +1230,13 @@ "for q in MAVA_MISANSWERED_QUES:\n", " ask(q)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From addf1db336fc9d9d74bb1756d76731093a87735c Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Mon, 11 Sep 2023 20:54:12 -0500 Subject: [PATCH 05/11] wip --- qa_bot/knowledge_base/c4/c4_test_qa.yaml | 104 ++ qa_bot/knowledge_base/c4/gh_docs | 1 + qa_bot/qa_bot.ipynb | 1852 +++++++++++----------- 3 files changed, 1066 insertions(+), 891 deletions(-) create mode 100644 qa_bot/knowledge_base/c4/c4_test_qa.yaml create mode 160000 qa_bot/knowledge_base/c4/gh_docs diff --git a/qa_bot/knowledge_base/c4/c4_test_qa.yaml b/qa_bot/knowledge_base/c4/c4_test_qa.yaml new file mode 100644 index 0000000..d5aec32 --- /dev/null +++ b/qa_bot/knowledge_base/c4/c4_test_qa.yaml @@ -0,0 +1,104 @@ +# list of question answer pairs in yaml format + +- question: Hi, how can I get backstage access? + answer: | + To get backstage access, you need to become a certified contributor. Here's how you can do that: + + Submit the Certified Contributor Application form and agree to the Certified Contributor Terms and Conditions. You can find the application form on the Code4rena website. + After you submit the form, the DAO's AML/KYC agent, Provenance, will contact you to certify your identity. + + Once you're a certified contributor, you may be eligible to participate in "Backstage" warden opportunities, which include post-contest triage and post-judging QA. Other opportunities for certified contributors include private or invite-only contests, the scout role, judging, providing mitigation review services, and offering solo audit and consulting services through C4. + + Please note that Code4rena is committed to privacy, so the certification process is done through a third party, Provenance, which is bound by confidentiality. + source: https://docs.code4rena.com/roles/certified-contributors + +- question: how long does it take until findings are released? + answer: | + Based on the context provided, the findings from the C4 contest are typically made public when the audit report is published. This happens between Day 42 and Day 60 after the contest finishes. Please note that these are average times and the actual timeline may vary. The team is actively working to improve their planning and processes to speed up this timeline. + source: https://docs.code4rena.com/structure/our-process + +- question: When can I talk about findings? + answer: | + You can talk about your findings after the contest report has been published. Publicly disclosing any discovered bugs or vulnerabilities before this point is grounds for disqualification from all C4 events. So, it's crucial to wait until the official report is out before discussing your findings. + source: https://docs.code4rena.com/roles/wardens + +- question: How do I change my wallet address? + answer: | + To change your wallet address, follow these steps: + Log into your account. + Navigate to your account management page. + Update your payment information with your new wallet address. + + Please note that for each contest, awards are distributed to the payment address on file at the time of award calculation. So, make sure to update your wallet address before the award calculation begins. + source: https://docs.code4rena.com/structure/frequently-asked-questions + +- question: What are scouts? + answer: | + In the context of Code4rena, Scouts are individuals who focus on scoping and pre-audit intel. This means they are responsible for gathering and analyzing information before an audit takes place. This could involve understanding the project's scope, identifying potential areas of risk, and preparing the necessary information for the audit process. + source: https://code4rena.com/how-it-works + +- question: How long does the contest process usually take? + answer: | + Based on the provided context, the contest process usually takes between 1 to 60 days. Here's a more detailed breakdown: + + Contest finishes: Day 1 + Sponsors review and give feedback on findings: Day 14 (on average) + Judges determine final severity: Day 21-30 + Judging QA complete; awards announced: Day 25-34 + Awards are distributed; Sponsors complete mitigation of any issues: Day 32-44 + Audit report is published; Contest issues are made public: Day 42-60 + + Please note that these are average times and the actual timeline may vary. The team is actively working to improve the planning and processes to speed up the process. + source: https://docs.code4rena.com/structure/our-process + +- question: how does certification work? + answer: | + The certification process at Code4rena works in the following way: + An eligible contributor submits the Certified Contributor Application form and agrees to the Certified Contributor Terms and Conditions. This is the initial step where the contributor expresses their interest in becoming certified. + + After the application is submitted, the DAO's AML/KYC agent, Provenance, contacts the contributor. Provenance is a third-party entity that is bound by confidentiality, ensuring the privacy of the contributor's information. + + Provenance then proceeds to certify the contributor's identity. This is done to establish trust and ensure that the contributor is who they claim to be. + + Once certified, contributors may be eligible to participate in various activities such as private contests, judging, providing mitigation review services, and more. The certification also opens up potential opportunities for certain contest bonus token awards and may be a factor in maxing out awards in the future. + source: https://docs.code4rena.com/roles/certified-contributors + +- question: Can I use bots to analyze code? + answer: | + Yes, you can use bots to analyze code. In fact, there are competitions called "Bot Races" where AI and automated tools are used as the first phase of a competitive audit. In these races, "Bot Crews" compete to see whose bot can create the highest quality and most thorough audit report. The process works in two stages: + + Stage 1: Qualifier Race + Bot Crews race to have their bots deliver the highest quality and most thorough report based on a repo provided at start time. + Qualifier Races are open for one hour. + Judges choose the Top 20 Bots for upcoming competitions. + + Stage 2: Bot Race + The details of this stage are not provided in the context, but it likely involves the top bots from the Qualifier Race competing in a more intensive code analysis challenge. + + This shows that bots can indeed be used to analyze code, and can even be quite effective at it. + source: https://code4rena.com/register/bot + +- question: Can I use bots to analyze code? + answer: | + Yes, you can use bots to analyze code. In fact, there are competitions called "Bot Races" where AI and automated tools are used as the first phase of a competitive audit. In these races, "Bot Crews" compete to see whose bot can create the highest quality and most thorough audit report. The process works in two stages: + + Stage 1: Qualifier Race + Bot Crews race to have their bots deliver the highest quality and most thorough report based on a repo provided at start time. + Qualifier Races are open for one hour. + Judges choose the Top 20 Bots for upcoming competitions. + + Stage 2: Bot Race + The details of this stage are not provided in the context, but it likely involves the top bots from the Qualifier Race competing in a more intensive code analysis challenge. + + This shows that bots can indeed be used to analyze code, and can even be quite effective at it. + source: https://code4rena.com/register/bot + +- question: Can I use chatgpt? + answer: | + Yes, you can use bots to analyze code. This is evident in the concept of "Bot Races" as described in the context. In these races, AI and automated tools, or "bots", are used in the first phase of a competitive audit. These bots compete to create the highest quality and most thorough audit report based on a provided repository. The bots that deliver the best reports are then chosen for upcoming competitions. This shows that bots can indeed be used to analyze code, and can even be used competitively to determine which bot can perform the most comprehensive analysis. + source: https://code4rena.com/register/bot + +- question: What is a lookout? + answer: | + In the context provided, a lookout is a role in the Code4rena ecosystem. Lookouts review and organize submissions to Code4rena’s audits. They play a crucial part in ensuring the quality and organization of the audit submissions. + source: https://code4rena.com/how-it-works diff --git a/qa_bot/knowledge_base/c4/gh_docs b/qa_bot/knowledge_base/c4/gh_docs new file mode 160000 index 0000000..da22307 --- /dev/null +++ b/qa_bot/knowledge_base/c4/gh_docs @@ -0,0 +1 @@ +Subproject commit da22307c0b2e3ff536f58cc28ce22180e8b5dcbd diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index ecdc3f6..5a9d511 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -35,9 +35,414 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: langchain[llms] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.0.278)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (8.2.3)\n", + "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.8.5)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.0.3)\n", + "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.5.14)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (3.8.5)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.20)\n", + "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.31)\n", + "Requirement already satisfied: numpy<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.25.2)\n", + "Requirement already satisfied: requests<3,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.31.0)\n", + "Requirement already satisfied: pydantic<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.10.12)\n", + "Requirement already satisfied: PyYAML>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (6.0.1)\n", + "Requirement already satisfied: clarifai>=9.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (9.7.6)\n", + "Requirement already satisfied: huggingface_hub<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.16.4)\n", + "Requirement already satisfied: openai<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.28.0)\n", + "Requirement already satisfied: openlm<0.0.6,>=0.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.5)\n", + "Requirement already satisfied: nlpcloud<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.1.44)\n", + "Requirement already satisfied: manifest-ml<0.0.2,>=0.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.1)\n", + "Requirement already satisfied: transformers<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.32.1)\n", + "Requirement already satisfied: torch<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.1)\n", + "Requirement already satisfied: cohere<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.21)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.3.1)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.4.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.9.2)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (3.2.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (23.1.0)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", + "Requirement already satisfied: tritonclient==2.34.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (2.34.0)\n", + "Requirement already satisfied: clarifai-grpc>=9.7.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (9.7.6)\n", + "Requirement already satisfied: rich==13.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (13.4.2)\n", + "Collecting tqdm==4.64.1\n", + " Using cached tqdm-4.64.1-py2.py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (2.16.1)\n", + "Requirement already satisfied: python-rapidjson>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tritonclient==2.34.0->clarifai>=9.1.0->langchain[llms]) (1.10)\n", + "Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (6.8.0)\n", + "Requirement already satisfied: fastavro==1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (1.8.2)\n", + "Requirement already satisfied: urllib3<3,>=1.26 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.0.4)\n", + "Requirement already satisfied: backoff<3.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.2.1)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (3.20.1)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (0.9.0)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (3.12.3)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (4.7.1)\n", + "Requirement already satisfied: sqlitedict>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (2.1.0)\n", + "Requirement already satisfied: redis>=4.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (5.0.0)\n", + "Requirement already satisfied: dill>=0.3.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (0.3.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (2023.7.22)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain[llms]) (2.0.2)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1.2)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (1.12)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.3.3)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.13.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (2023.8.8)\n", + "Requirement already satisfied: protobuf>=3.20.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (4.24.2)\n", + "Requirement already satisfied: grpcio>=1.44.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.57.0)\n", + "Requirement already satisfied: googleapis-common-protos>=1.53.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.60.0)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5,>=4->langchain[llms]) (3.16.2)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (1.0.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch<3,>=1->langchain[llms]) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->torch<3,>=1->langchain[llms]) (1.3.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (0.1.2)\n", + "Installing collected packages: tqdm\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.66.1\n", + " Uninstalling tqdm-4.66.1:\n", + " Successfully uninstalled tqdm-4.66.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "chromadb 0.4.8 requires tqdm>=4.65.0, but you have tqdm 4.64.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed tqdm-4.64.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: Scrapy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.10.1)\n", + "Requirement already satisfied: tldextract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (3.4.4)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1)\n", + "Requirement already satisfied: PyDispatcher>=2.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.0.7)\n", + "Requirement already satisfied: cssselect>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.2.0)\n", + "Requirement already satisfied: Twisted<23.8.0,>=18.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (22.10.0)\n", + "Requirement already satisfied: service-identity>=18.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1.0)\n", + "Requirement already satisfied: pyOpenSSL>=21.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.2.0)\n", + "Requirement already satisfied: w3lib>=1.17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.1.2)\n", + "Requirement already satisfied: protego>=0.1.15 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.3.0)\n", + "Requirement already satisfied: queuelib>=1.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.6.2)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (41.0.3)\n", + "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (63.2.0)\n", + "Requirement already satisfied: parsel>=1.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.8.1)\n", + "Requirement already satisfied: zope.interface>=5.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (6.0)\n", + "Requirement already satisfied: itemloaders>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.1.0)\n", + "Requirement already satisfied: itemadapter>=0.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.8.0)\n", + "Requirement already satisfied: lxml>=4.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (4.9.3)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->Scrapy) (1.15.1)\n", + "Requirement already satisfied: jmespath>=0.9.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from itemloaders>=1.0.1->Scrapy) (1.0.1)\n", + "Requirement already satisfied: pyasn1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.5.0)\n", + "Requirement already satisfied: pyasn1-modules in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.3.0)\n", + "Requirement already satisfied: attrs>=19.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (23.1.0)\n", + "Requirement already satisfied: constantly>=15.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (15.1.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (4.7.1)\n", + "Requirement already satisfied: Automat>=0.8.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", + "Requirement already satisfied: hyperlink>=17.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (21.0.0)\n", + "Requirement already satisfied: incremental>=21.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", + "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.3)\n", + "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", + "Requirement already satisfied: requests>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (2.31.0)\n", + "Requirement already satisfied: requests-file>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (1.5.1)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Automat>=0.8.0->Twisted<23.8.0,>=18.9.0->Scrapy) (1.16.0)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->Scrapy) (2.21)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (3.2.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: html2text in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2020.1.16)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (4.9.3)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: python-dotenv in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.0.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: unstructured[all-docs] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.10.11)\n", + "Requirement already satisfied: nltk in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.8.1)\n", + "Requirement already satisfied: filetype in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: chardet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (5.2.0)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.12.2)\n", + "Requirement already satisfied: requests in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.31.0)\n", + "Requirement already satisfied: python-magic in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.4.27)\n", + "Requirement already satisfied: emoji in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.8.0)\n", + "Requirement already satisfied: tabulate in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.14)\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", + "Requirement already satisfied: openpyxl in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: python-docx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.8.11)\n", + "Requirement already satisfied: ebooklib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.18)\n", + "Requirement already satisfied: msg-parser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: unstructured-inference in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.19)\n", + "Requirement already satisfied: Pillow<10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (9.5.0)\n", + "Requirement already satisfied: pdfminer.six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (20221105)\n", + "Requirement already satisfied: pdf2image in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.16.3)\n", + "Requirement already satisfied: markdown in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.4.4)\n", + "Requirement already satisfied: pypandoc in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.11)\n", + "Requirement already satisfied: python-pptx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.22)\n", + "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.1.0)\n", + "Requirement already satisfied: xlrd in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->unstructured[all-docs]) (2.4.1)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ebooklib->unstructured[all-docs]) (1.16.0)\n", + "Requirement already satisfied: olefile>=0.46 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from msg-parser->unstructured[all-docs]) (0.46)\n", + "Requirement already satisfied: joblib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (2023.8.8)\n", + "Requirement already satisfied: click in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (8.1.7)\n", + "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", + "Requirement already satisfied: et-xmlfile in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from openpyxl->unstructured[all-docs]) (1.1.0)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.25.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2.8.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", + "Requirement already satisfied: charset-normalizer>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (3.2.0)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (41.0.3)\n", + "Requirement already satisfied: XlsxWriter>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-pptx->unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2023.7.22)\n", + "Requirement already satisfied: opencv-python!=4.7.0.68 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.8.0.76)\n", + "Requirement already satisfied: onnxruntime in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: huggingface-hub in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.16.4)\n", + "Requirement already satisfied: python-multipart in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.0.6)\n", + "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.32.1)\n", + "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.3.4)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: packaging>=17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->unstructured[all-docs]) (23.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.13.3)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.3.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (4.7.1)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (2023.6.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json->unstructured[all-docs]) (1.0.0)\n", + "Requirement already satisfied: pdfplumber in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.10.2)\n", + "Requirement already satisfied: scipy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.11.2)\n", + "Requirement already satisfied: iopath in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.1.10)\n", + "Requirement already satisfied: pytesseract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.3.10)\n", + "Requirement already satisfied: effdet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.4.1)\n", + "Requirement already satisfied: torchvision in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.15.2)\n", + "Requirement already satisfied: torch in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (1.12)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (4.24.2)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (15.0.1)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (23.5.26)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.21)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime->unstructured-inference->unstructured[all-docs]) (10.0)\n", + "Requirement already satisfied: pycocotools>=2.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.7)\n", + "Requirement already satisfied: omegaconf>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.3.0)\n", + "Requirement already satisfied: timm>=0.9.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.9.6)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: portalocker in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.7.0)\n", + "Requirement already satisfied: pypdfium2>=4.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.19.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime->unstructured-inference->unstructured[all-docs]) (1.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.9.3)\n", + "Requirement already satisfied: matplotlib>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.7.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.1.3)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.11.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.4.5)\n", + "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.0.9)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.1.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.42.1)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: tiktoken in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.0)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", + "Requirement already satisfied: requests>=2.26.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2.31.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: faiss-cpu in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.7.4)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: GitPython in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (3.1.33)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from GitPython) (4.0.10)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from gitdb<5,>=4.0.1->GitPython) (5.0.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: notebook in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (7.0.3)\n", + "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (6.3.3)\n", + "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (0.2.3)\n", + "Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.24.0)\n", + "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.7.3)\n", + "Requirement already satisfied: jupyterlab<5,>=4.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (4.0.5)\n", + "Requirement already satisfied: jupyter-events>=0.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (4.0.0)\n", + "Requirement already satisfied: pyzmq>=24 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (25.1.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", + "Requirement already satisfied: prometheus-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", + "Requirement already satisfied: websocket-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.6.2)\n", + "Requirement already satisfied: traitlets>=5.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.0)\n", + "Requirement already satisfied: jupyter-client>=7.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (8.3.1)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", + "Requirement already satisfied: terminado>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", + "Requirement already satisfied: send2trash>=1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.8.2)\n", + "Requirement already satisfied: jupyter-server-terminals in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.4.4)\n", + "Requirement already satisfied: nbformat>=5.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.2)\n", + "Requirement already satisfied: overrides in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.4.0)\n", + "Requirement already satisfied: argon2-cffi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1.0)\n", + "Requirement already satisfied: nbconvert>=6.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.8.0)\n", + "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.3.1)\n", + "Requirement already satisfied: jupyter-lsp>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.2.0)\n", + "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.1)\n", + "Requirement already satisfied: async-lru>=1.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.4)\n", + "Requirement already satisfied: tomli in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.1)\n", + "Requirement already satisfied: json5>=0.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (0.9.14)\n", + "Requirement already satisfied: jsonschema>=4.17.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (4.19.0)\n", + "Requirement already satisfied: babel>=2.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.12.1)\n", + "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.31.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", + "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.1.3)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab<5,>=4.0.2->notebook) (4.7.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook) (2.1.3)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.30.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.10.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-client>=7.4.4->jupyter-server<3,>=2.4.0->notebook) (2.8.2)\n", + "Requirement already satisfied: platformdirs>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->jupyter-server<3,>=2.4.0->notebook) (3.10.0)\n", + "Requirement already satisfied: rfc3339-validator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.4)\n", + "Requirement already satisfied: python-json-logger>=2.0.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (2.0.7)\n", + "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", + "Requirement already satisfied: rfc3986-validator>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.1)\n", + "Requirement already satisfied: mistune<4,>=2.0.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (3.0.1)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", + "Requirement already satisfied: bleach!=5.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (6.0.0)\n", + "Requirement already satisfied: jupyterlab-pygments in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.2.2)\n", + "Requirement already satisfied: defusedxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.7.1)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.5.0)\n", + "Requirement already satisfied: tinycss2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.2.1)\n", + "Requirement already satisfied: nbclient>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.8.0)\n", + "Requirement already satisfied: pygments>=2.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.16.1)\n", + "Requirement already satisfied: fastjsonschema in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook) (2.18.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2.0.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (3.2.0)\n", + "Requirement already satisfied: ptyprocess in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from terminado>=0.8.3->jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Requirement already satisfied: argon2-cffi-bindings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (21.2.0)\n", + "Requirement already satisfied: debugpy>=1.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.6.7.post1)\n", + "Requirement already satisfied: ipython>=7.23.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (8.14.0)\n", + "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", + "Requirement already satisfied: psutil in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.9.5)\n", + "Requirement already satisfied: nest-asyncio in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.5.7)\n", + "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", + "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", + "Requirement already satisfied: six>=1.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.16.0)\n", + "Requirement already satisfied: webencodings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.5.1)\n", + "Requirement already satisfied: stack-data in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.6.2)\n", + "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (3.0.39)\n", + "Requirement already satisfied: pexpect>4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", + "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", + "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", + "Requirement already satisfied: decorator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.1.1)\n", + "Requirement already satisfied: uri-template in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.3.0)\n", + "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", + "Requirement already satisfied: fqdn in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.5.1)\n", + "Requirement already satisfied: isoduration in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (20.11.0)\n", + "Requirement already satisfied: webcolors>=1.11 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.13)\n", + "Requirement already satisfied: cffi>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (1.15.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.4.1)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (2.21)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.8.3)\n", + "Requirement already satisfied: wcwidth in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.6)\n", + "Requirement already satisfied: arrow>=0.15.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from isoduration->jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.2.3)\n", + "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", + "Requirement already satisfied: asttokens>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (2.2.1)\n", + "Requirement already satisfied: executing>=1.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.2.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: chromadb in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.8)\n", + "Requirement already satisfied: pydantic<2.0,>=1.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.10.12)\n", + "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.7.2)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.23.2)\n", + "Requirement already satisfied: numpy>=1.21.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.25.2)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.13.3)\n", + "Requirement already satisfied: pypika>=0.48.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.48.9)\n", + "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", + "Collecting tqdm>=4.65.0\n", + " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.15.1)\n", + "Requirement already satisfied: posthog>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.0.2)\n", + "Requirement already satisfied: importlib-resources in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (6.0.1)\n", + "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", + "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.3.0)\n", + "Requirement already satisfied: fastapi<0.100.0,>=0.95.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.99.1)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.7.1)\n", + "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.0.1)\n", + "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from fastapi<0.100.0,>=0.95.2->chromadb) (0.27.0)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.2)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", + "Requirement already satisfied: python-dateutil>2.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n", + "Requirement already satisfied: monotonic>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", + "Requirement already satisfied: backoff>=1.10.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", + "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: certifi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", + "Requirement already satisfied: h11>=0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", + "Requirement already satisfied: click>=7.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.7)\n", + "Requirement already satisfied: httptools>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", + "Requirement already satisfied: python-dotenv>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", + "Requirement already satisfied: watchfiles>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n", + "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", + "Requirement already satisfied: websockets>=10.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.3)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (4.0.0)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.3)\n", + "Installing collected packages: tqdm\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.64.1\n", + " Uninstalling tqdm-4.64.1:\n", + " Successfully uninstalled tqdm-4.64.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "clarifai 9.7.6 requires tqdm==4.64.1, but you have tqdm 4.66.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed tqdm-4.66.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.1.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (1.25.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ "# Install all the third-party packages\n", "\n", @@ -51,12 +456,13 @@ "!pip install faiss-cpu \n", "!pip install GitPython\n", "!pip install notebook\n", - "!pip install chromadb" + "!pip install chromadb\n", + "!pip install pandas" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -65,7 +471,7 @@ "True" ] }, - "execution_count": 17, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -83,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -95,6 +501,19 @@ "assert OPENAI_API_KEY, \"Please set OPENAI_API_KEY in your environment variables\"" ] }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "# Paths to the data\n", + "\n", + "C4_WEBSITE_STORAGE_DIR = \"knowledge_base/c4/website\"\n", + "C4_DOCS_STORAGE_DIR = \"knowledge_base/c4/docs\"\n", + "C4_GH_DOCS_STORAGE_DIR = \"knowledge_base/c4/gh_docs\"" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -104,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -115,9 +534,6 @@ "import json\n", "from urllib.parse import urlparse\n", "\n", - "C4_WEBSITE_STORAGE_DIR = \"knowledge_base/c4/website\"\n", - "C4_DOCS_STORAGE_DIR = \"knowledge_base/c4/docs\"\n", - "\n", "class GenericSpider(scrapy.Spider):\n", " name = 'generic'\n", "\n", @@ -172,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -190,6 +606,26 @@ "# reactor.run()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Get docs from Github Repo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from git import Repo\n", + "\n", + "# repo = Repo.clone_from(\n", + "# \"https://github.com/code-423n4/docs\", to_path=C4_GH_DOCS_STORAGE_DIR\n", + "# )" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -206,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -227,6 +663,19 @@ "c4_docs_data_list = load_json_files(C4_DOCS_STORAGE_DIR)" ] }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.document_loaders import TextLoader\n", + "\n", + "loader = DirectoryLoader(C4_GH_DOCS_STORAGE_DIR, loader_cls=TextLoader)\n", + "c4_gh_docs_data_list = loader.load()\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -236,7 +685,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -244,7 +693,8 @@ "output_type": "stream", "text": [ "89\n", - "97\n" + "97\n", + "72\n" ] } ], @@ -261,9 +711,11 @@ "\n", "website_chunks = md_splitter.split_documents(c4_website_data_list)\n", "docs_chunks = md_splitter.split_documents(c4_docs_data_list)\n", + "gh_docs_chunks = md_splitter.split_documents(c4_gh_docs_data_list)\n", "\n", "print(len(website_chunks))\n", - "print(len(docs_chunks))" + "print(len(docs_chunks))\n", + "print(len(gh_docs_chunks))" ] }, { @@ -275,9 +727,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.\n", + "Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.\n" + ] + }, + { + "data": { + "text/plain": [ + "['9f122ac2-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122bf8-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122c52-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122c8e-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122cca-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122cfc-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122d38-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122d6a-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122d9c-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122dce-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122e00-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122e32-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122e64-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122e96-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122ebe-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122ef0-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122f22-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122f54-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122f86-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122fb8-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f122fea-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f12301c-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f12304e-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123080-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1230b2-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1230da-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f12310c-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f12313e-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1231ac-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1231de-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123210-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123242-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123274-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1232a6-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1232d8-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123300-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123332-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123364-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123396-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1233be-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1233f0-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123422-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123454-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123486-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1234b8-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1234e0-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123512-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123544-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123576-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1235a8-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1235d0-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123602-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123634-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123666-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123698-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1236ca-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1236fc-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123724-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123756-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123788-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1237b0-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1237e2-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123814-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123846-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f12386e-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1238a0-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1238d2-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123904-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123936-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f123968-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f12399a-50d5-11ee-8d9d-367dda1ae1c5',\n", + " '9f1239c2-50d5-11ee-8d9d-367dda1ae1c5']" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from langchain.vectorstores import FAISS\n", "from langchain.embeddings import OpenAIEmbeddings\n", @@ -289,7 +831,8 @@ "vectorstore = Chroma(\"langchain_store\", embeddings)\n", "\n", "vectorstore.add_documents(website_chunks)\n", - "vectorstore.add_documents(docs_chunks)\n" + "# vectorstore.add_documents(docs_chunks)\n", + "vectorstore.add_documents(gh_docs_chunks)\n" ] }, { @@ -301,7 +844,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -310,8 +853,14 @@ "\n", "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), return_source_documents=True)\n", "\n", - "def ask(question):\n", + "\n", + "def call_llm(question):\n", " result = qa({\"query\": question})\n", + " return result\n", + " \n", + "\n", + "def ask(question):\n", + " result = call_llm(question)\n", " display(Markdown(f\"### Question\"))\n", " display(Markdown(question))\n", "\n", @@ -319,13 +868,13 @@ " display(Markdown(result[\"result\"]))\n", "\n", " display(Markdown(f\"### Sources\"))\n", - " sources = [r.metadata['url'] for r in result[\"source_documents\"]]\n", + " sources = [r.metadata['url'] if 'url' in r.metadata else r.metadata['source'] for r in result[\"source_documents\"] ]\n", " print(\", \".join(sources))" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -349,893 +898,414 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for q in MAVA_MISANSWERED_QUES:\n", + "# ask(q)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### AutoEvaluator\n", + "Using LangChain's [AutoEvaluator technique](https://autoevaluator.langchain.com/) to evaluate the bot's performance on the dataset of C4 questions correctly answered by Mava as per team feedback\n" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "import yaml\n", + "\n", + "# load yaml file\n", + "with open('knowledge_base/c4/c4_test_qa.yaml') as file:\n", + " # The FullLoader parameter handles the conversion from YAML\n", + " # scalar values to Python the dictionary format\n", + " yaml_data = yaml.load(file, Loader=yaml.FullLoader)\n", + "\n", + "mava_questions = [d['question'] for d in yaml_data]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "\n", + "template = \"\"\" \n", + " You are a grader trying to determine if a set of retrieved documents will help a student answer a question. \\n\n", + "\n", + " Here is the question: \\n\n", + " {query}\n", + "\n", + " Here are the documents retrieved to answer question: \\n\n", + " {result}\n", + " \n", + " Here is the correct answer to the question: \\n \n", + " {answer}\n", + " \n", + " Criteria: \n", + " relevance: Do all of the documents contain information that will help the student arrive that the correct answer to the question?\"\n", + "\n", + " Your response should be as follows:\n", + "\n", + " GRADE: (Correct or Incorrect, depending if all of the documents retrieved meet the criterion)\n", + " (line break)\n", + " JUSTIFICATION: (Write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Use three sentences maximum. Keep the answer as concise as possible.)\n", + " \"\"\"\n", + "\n", + "GRADE_DOCS_PROMPT = PromptTemplate(input_variables=['result', 'answer', 'query'], template=template)\n", + "\n", + "template = \"\"\"You are a teacher grading a quiz. \n", + "You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either Correct or Incorrect.\n", + "\n", + "Example Format:\n", + "QUESTION: question here\n", + "STUDENT ANSWER: student's answer here\n", + "TRUE ANSWER: true answer here\n", + "GRADE: Correct or Incorrect here\n", + "\n", + "Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. If the student answers that there is no specific information provided in the context, then the answer is Incorrect. Begin! \n", + "\n", + "QUESTION: {query}\n", + "STUDENT ANSWER: {result}\n", + "TRUE ANSWER: {answer}\n", + "GRADE:\n", + "\n", + "Your response should be as follows:\n", + "\n", + "GRADE: (Correct or Incorrect)\n", + "(line break)\n", + "JUSTIFICATION: (Without mentioning the student/teacher framing of this prompt, explain why the STUDENT ANSWER is Correct or Incorrect. Use one or two sentences maximum. Keep the answer as concise as possible.)\n", + "\"\"\"\n", + "\n", + "GRADE_ANSWER_PROMPT = PromptTemplate(input_variables=[\"query\", \"result\", \"answer\"], template=template)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.evaluation.qa import QAEvalChain\n", + "\n", + "def grade_model_answer(predicted_dataset, predictions):\n", + "\n", + " # Create an evaluation chain\n", + " eval_chain = QAEvalChain.from_llm(\n", + " llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0),\n", + " prompt=GRADE_ANSWER_PROMPT\n", + " )\n", + "\n", + " # Evaluate the predictions and ground truth using the evaluation chain\n", + " graded_outputs = eval_chain.evaluate(\n", + " predicted_dataset,\n", + " predictions,\n", + " question_key=\"question\",\n", + " prediction_key=\"result\"\n", + " )\n", + "\n", + " return graded_outputs\n", + "\n", + "\n", + "def grade_model_retrieval(gt_dataset, predictions):\n", + " # Create an evaluation chain\n", + " eval_chain = QAEvalChain.from_llm(\n", + " llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0),\n", + " prompt=GRADE_DOCS_PROMPT\n", + " )\n", + "\n", + " # Evaluate the predictions and ground truth using the evaluation chain\n", + " graded_outputs = eval_chain.evaluate(\n", + " gt_dataset,\n", + " predictions,\n", + " question_key=\"question\",\n", + " prediction_key=\"result\"\n", + " )\n", + " return graded_outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "bot_answers = []\n", + "source_docs = []\n", + "for d in yaml_data:\n", + " result = call_llm(d['question'])\n", + " bot_answers.append(result['result'])\n", + " source_docs.append(result['source_documents'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "predictions = [{'result': a} for a in bot_answers]\n", + "\n", + "answer_grades = grade_model_retrieval(yaml_data, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "retrieved_docs = []\n", + "for i, d in enumerate(yaml_data):\n", + " retrieved_doc_text = \"\"\n", + " for j, doc in enumerate(source_docs[i]):\n", + " retrieved_doc_text += \"Doc %s: \" % str(j + 1) + doc.page_content + \" \"\n", + " retrieved = {\"question\": d[\"question\"], \"answer\": d[\"answer\"], \"result\": retrieved_doc_text}\n", + " retrieved_docs.append(retrieved)\n", + "\n", + "retrieval_grades = grade_model_retrieval(yaml_data, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, "metadata": {}, "outputs": [ { "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "what's a scout?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "A Scout in Code4rena is a role that focuses on scoping and pre-audit intel. Currently, Scouts are hand-picked by the C4 team as it's a highly sensitive role." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionMava correct answer (True value)Bot answersAnswer similarity scoreRetrieval relevancy score
0Hi, how can I get backstage access?To get backstage access, you need to become a ...I'm sorry, but the provided context does not c...IncorrectIncorrect
1how long does it take until findings are relea...Based on the context provided, the findings fr...The audit report, which includes the findings,...CorrectCorrect
2When can I talk about findings?You can talk about your findings after the con...The context does not provide information on wh...IncorrectIncorrect
3How do I change my wallet address?To change your wallet address, follow these st...Unfortunately, due to some restrictions in Mor...IncorrectIncorrect
4What are scouts?In the context of Code4rena, Scouts are indivi...The text provided does not provide any informa...IncorrectIncorrect
5How long does the contest process usually take?Based on the provided context, the contest pro...The contest process, from the closing of audit...CorrectCorrect
6how does certification work?The certification process at Code4rena works i...Certification for wardens involves a process w...CorrectCorrect
7Can I use bots to analyze code?Yes, you can use bots to analyze code. In fact...Yes, you can use bots to analyze code. In fact...CorrectCorrect
8Can I use bots to analyze code?Yes, you can use bots to analyze code. In fact...Yes, you can use bots to analyze code. In fact...CorrectCorrect
9Can I use chatgpt?Yes, you can use bots to analyze code. This is...The provided context does not contain informat...IncorrectIncorrect
10What is a lookout?In the context provided, a lookout is a role i...A Lookout in the context of Code4rena is a rol...CorrectCorrect
\n", + "
" + ], + "text/plain": [ + " question \\\n", + "0 Hi, how can I get backstage access? \n", + "1 how long does it take until findings are relea... \n", + "2 When can I talk about findings? \n", + "3 How do I change my wallet address? \n", + "4 What are scouts? \n", + "5 How long does the contest process usually take? \n", + "6 how does certification work? \n", + "7 Can I use bots to analyze code? \n", + "8 Can I use bots to analyze code? \n", + "9 Can I use chatgpt? \n", + "10 What is a lookout? \n", + "\n", + " Mava correct answer (True value) \\\n", + "0 To get backstage access, you need to become a ... \n", + "1 Based on the context provided, the findings fr... \n", + "2 You can talk about your findings after the con... \n", + "3 To change your wallet address, follow these st... \n", + "4 In the context of Code4rena, Scouts are indivi... \n", + "5 Based on the provided context, the contest pro... \n", + "6 The certification process at Code4rena works i... \n", + "7 Yes, you can use bots to analyze code. In fact... \n", + "8 Yes, you can use bots to analyze code. In fact... \n", + "9 Yes, you can use bots to analyze code. This is... \n", + "10 In the context provided, a lookout is a role i... \n", + "\n", + " Bot answers Answer similarity score \\\n", + "0 I'm sorry, but the provided context does not c... Incorrect \n", + "1 The audit report, which includes the findings,... Correct \n", + "2 The context does not provide information on wh... Incorrect \n", + "3 Unfortunately, due to some restrictions in Mor... Incorrect \n", + "4 The text provided does not provide any informa... Incorrect \n", + "5 The contest process, from the closing of audit... Correct \n", + "6 Certification for wardens involves a process w... Correct \n", + "7 Yes, you can use bots to analyze code. In fact... Correct \n", + "8 Yes, you can use bots to analyze code. In fact... Correct \n", + "9 The provided context does not contain informat... Incorrect \n", + "10 A Lookout in the context of Code4rena is a rol... Correct \n", + "\n", + " Retrieval relevancy score \n", + "0 Incorrect \n", + "1 Correct \n", + "2 Incorrect \n", + "3 Incorrect \n", + "4 Incorrect \n", + "5 Correct \n", + "6 Correct \n", + "7 Correct \n", + "8 Correct \n", + "9 Incorrect \n", + "10 Correct " + ] + }, + "execution_count": 109, "metadata": {}, - "output_type": "display_data" - }, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.DataFrame({\n", + " \"question\": [d['question'] for d in yaml_data],\n", + " \"Mava correct answer (True value)\": [d['answer'] for d in yaml_data],\n", + " \"Bot answers\": [p['result'] for p in predictions],\n", + " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades],\n", + " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades]\n", + "})\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "https://docs.code4rena.com/roles/certified-contributors/lookouts, https://docs.code4rena.com/structure/frequently-asked-questions, https://code4rena.com/how-it-works, https://code4rena.com/how-it-works\n" + "Bot Accuracy: 0.5454545454545454\n" ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Am I allowed to use AI in an audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Yes, you are allowed to use AI in an audit, but there are some restrictions. Code4rena runs a Bot Race at the start of each audit where wardens compete to see whose AI-driven bot can create the highest quality and most thorough audit report. The winning report is shared with all C4 wardens and all findings in the winning Bot Report will be declared publicly known issues, and therefore ineligible for awards. \n", - "\n", - "However, using the output of AI tools like ChatGPT, GPT-3, or other automated tools for audit submissions is highly discouraged as it often leads to a high ratio of nonsense submissions. If you use automated tools as a first pass, you will have a higher burden of proof for demonstrating a relevant High and Medium severity exploit path to be considered satisfactory." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, + } + ], + "source": [ + "print(f\"Bot Accuracy: {df['Answer similarity score'].value_counts()['Correct'] / len(df['Answer similarity score'])}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ { "data": { - "text/markdown": [ - "### Sources" - ], "text/plain": [ - "" + "55" ] }, + "execution_count": 108, "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/awarding/incentive-model-and-awards\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Can I change my Code4rena username?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "No, you cannot change your Code4rena username. Once chosen, your username cannot be changed later." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/roles/wardens/warden-auth, https://code4rena.com/register, https://code4rena.com/register, https://code4rena.com/help\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "How do I book a solo audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The process of booking a solo audit begins with a project team member clicking the \"Get a quote\" button on a warden's profile, and sharing scoping details with the Code4rena team. Code4rena staff will then consult with the warden and project team to firm up scoping, pricing, and dates." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/roles/wardens/solo-audits, https://code4rena.com/register, https://code4rena.com/register, https://docs.code4rena.com/structure/frequently-asked-questions\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Do I need to be certified to participate in an audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Yes, you need to be a Certified Contributor to participate in an audit according to the information provided. You can become a Certified Contributor by submitting the Certified Contributor Application form." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/roles/wardens, https://docs.code4rena.com/roles/wardens/solo-audits\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "How do bot races work?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "I'm sorry, but the provided context does not contain any information about bot races." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/roles/judges, https://docs.code4rena.com/roles/judges/how-to-judge-a-contest, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/roles/sponsors\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Can I change my Code4rena profile name?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The provided context does not include information on whether you can change your Code4rena profile name." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/roles/wardens/warden-auth, https://code4rena.com/help, https://code4rena.com/help, https://code4rena.com/contests/2023-05-chainlink-cross-chain-services-ccip-and-arm-network\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "What are scout awards?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "I'm sorry, but the provided context does not contain any information about \"scout awards\"." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/philosophy/security-is-about-people, https://docs.code4rena.com/roles/certified-contributors/lookouts, https://docs.code4rena.com/awarding/incentive-model-and-awards/awarding-process, https://docs.code4rena.com/awarding/judging-criteria\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "What are analysis reports?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Analysis reports are written submissions that outline the Wardens' analysis of the codebase as a whole, any observations or advice they have about architecture, mechanism, or approach, broader concerns like systemic risks or centralization risks, and the approach taken in reviewing the code. They also include new insights and learnings from the audit. These reports provide wardens with an opportunity to contribute value through high level insights and advice that aren't necessarily covered by specific bugs. Analyses are judged A/B/C, with the top Analysis selected for inclusion in the audit report." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/awarding/judging-criteria, https://docs.code4rena.com/awarding/incentive-model-and-awards, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/awarding/incentive-model-and-awards\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "what is an analysis finding?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "An analysis finding is a written submission that outlines the Wardens' analysis of the codebase as a whole and any observations or advice they have about architecture, mechanism, or approach. It also includes broader concerns like systemic risks or centralization risks, the approach taken in reviewing the code, and new insights and learnings from the audit. Analyses are judged A/B/C, with the top Analysis selected for inclusion in the audit report." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/awarding/judging-criteria, https://docs.code4rena.com/awarding/incentive-model-and-awards, https://docs.code4rena.com/awarding/fairness-and-validity, https://docs.code4rena.com/structure/frequently-asked-questions\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "My name wasn't in the award announcements. When can I check on my results?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Based on the audit timeline provided by Code4rena Foundation, the judging QA is completed and awards are announced between Day 25-34 after audit submissions close. If your name wasn't in the award announcements, you may want to wait until this period is over. If you still don't see your award after this time, you may need to contact the foundation for further assistance." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/awarding/incentive-model-and-awards/awarding-process, https://docs.code4rena.com/roles/wardens/warden-auth, https://docs.code4rena.com/awarding/incentive-model-and-awards/qa-gas-report-faq, https://docs.code4rena.com/structure/our-process\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "How long does the certification process take?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Once you submit the Certified Contributor Application form, Provenance typically emails you within one business day. If you have all the available documents, the process can usually be completed within a day. However, it will take longer if you need to assemble the necessary documents." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/structure/our-process, https://docs.code4rena.com/roles/certified-contributors, https://docs.code4rena.com/roles/wardens\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "How can I access findings.csv?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "I'm sorry, but the provided context does not contain information on how to access a file named \"findings.csv\"." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://docs.code4rena.com/structure/frequently-asked-questions, https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/roles/wardens/submission-policy, https://docs.code4rena.com/roles/wardens/warden-auth\n" - ] + "output_type": "execute_result" } ], - "source": [ - "for q in MAVA_MISANSWERED_QUES:\n", - " ask(q)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], "source": [] } ], From 7d8501b81d7c50fa328222391f3fdba63c9fbbc5 Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Wed, 13 Sep 2023 19:36:25 -0500 Subject: [PATCH 06/11] wip --- qa_bot/knowledge_base/c4/c4_test_qa.yaml | 20 - qa_bot/qa_bot.ipynb | 1828 +++++++++++++++++++--- 2 files changed, 1606 insertions(+), 242 deletions(-) diff --git a/qa_bot/knowledge_base/c4/c4_test_qa.yaml b/qa_bot/knowledge_base/c4/c4_test_qa.yaml index d5aec32..81e3520 100644 --- a/qa_bot/knowledge_base/c4/c4_test_qa.yaml +++ b/qa_bot/knowledge_base/c4/c4_test_qa.yaml @@ -78,26 +78,6 @@ This shows that bots can indeed be used to analyze code, and can even be quite effective at it. source: https://code4rena.com/register/bot -- question: Can I use bots to analyze code? - answer: | - Yes, you can use bots to analyze code. In fact, there are competitions called "Bot Races" where AI and automated tools are used as the first phase of a competitive audit. In these races, "Bot Crews" compete to see whose bot can create the highest quality and most thorough audit report. The process works in two stages: - - Stage 1: Qualifier Race - Bot Crews race to have their bots deliver the highest quality and most thorough report based on a repo provided at start time. - Qualifier Races are open for one hour. - Judges choose the Top 20 Bots for upcoming competitions. - - Stage 2: Bot Race - The details of this stage are not provided in the context, but it likely involves the top bots from the Qualifier Race competing in a more intensive code analysis challenge. - - This shows that bots can indeed be used to analyze code, and can even be quite effective at it. - source: https://code4rena.com/register/bot - -- question: Can I use chatgpt? - answer: | - Yes, you can use bots to analyze code. This is evident in the concept of "Bot Races" as described in the context. In these races, AI and automated tools, or "bots", are used in the first phase of a competitive audit. These bots compete to create the highest quality and most thorough audit report based on a provided repository. The bots that deliver the best reports are then chosen for upcoming competitions. This shows that bots can indeed be used to analyze code, and can even be used competitively to determine which bot can perform the most comprehensive analysis. - source: https://code4rena.com/register/bot - - question: What is a lookout? answer: | In the context provided, a lookout is a role in the Code4rena ecosystem. Lookouts review and organize submissions to Code4rena’s audits. They play a crucial part in ensuring the quality and organization of the audit submissions. diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index 5a9d511..15a6a78 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -685,7 +685,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 322, "metadata": {}, "outputs": [ { @@ -727,95 +727,87 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 271, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.\n", - "Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.\n" - ] - }, { "data": { "text/plain": [ - "['9f122ac2-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122bf8-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122c52-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122c8e-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122cca-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122cfc-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122d38-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122d6a-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122d9c-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122dce-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122e00-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122e32-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122e64-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122e96-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122ebe-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122ef0-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122f22-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122f54-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122f86-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122fb8-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f122fea-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f12301c-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f12304e-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123080-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1230b2-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1230da-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f12310c-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f12313e-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1231ac-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1231de-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123210-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123242-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123274-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1232a6-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1232d8-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123300-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123332-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123364-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123396-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1233be-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1233f0-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123422-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123454-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123486-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1234b8-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1234e0-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123512-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123544-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123576-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1235a8-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1235d0-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123602-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123634-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123666-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123698-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1236ca-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1236fc-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123724-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123756-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123788-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1237b0-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1237e2-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123814-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123846-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f12386e-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1238a0-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1238d2-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123904-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123936-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f123968-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f12399a-50d5-11ee-8d9d-367dda1ae1c5',\n", - " '9f1239c2-50d5-11ee-8d9d-367dda1ae1c5']" - ] - }, - "execution_count": 34, + "['72e47726-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e4782a-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47866-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47898-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e478c0-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e478e8-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47910-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47938-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47960-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47988-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e479b0-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e479d8-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47a00-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47a28-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47a50-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47a78-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47a96-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47abe-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47ae6-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47b0e-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47b36-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47b5e-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47b86-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47ba4-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47bcc-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47bf4-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47c1c-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47c44-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47c6c-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47c8a-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47cb2-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47cda-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47d02-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47d2a-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47d52-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47d70-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47d98-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47dc0-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47de8-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47e10-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47e38-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47e60-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47e7e-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47ea6-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47ece-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47ef6-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47f1e-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47f3c-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47f64-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47f8c-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47fb4-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e47fdc-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48004-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e4802c-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e4804a-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48072-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e4809a-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e480c2-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e480ea-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48112-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48130-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48158-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48180-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e481a8-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e481d0-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e481f8-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48220-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48248-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e48270-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e4828e-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e482b6-5246-11ee-8d9d-367dda1ae1c5',\n", + " '72e482de-5246-11ee-8d9d-367dda1ae1c5']" + ] + }, + "execution_count": 271, "metadata": {}, "output_type": "execute_result" } @@ -828,10 +820,10 @@ "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", "\n", "embeddings = OpenAIEmbeddings()\n", - "vectorstore = Chroma(\"langchain_store\", embeddings)\n", + "vectorstore = Chroma(\"vectorstore_1\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", "\n", "vectorstore.add_documents(website_chunks)\n", - "# vectorstore.add_documents(docs_chunks)\n", + "#vectorstore.add_documents(docs_chunks)\n", "vectorstore.add_documents(gh_docs_chunks)\n" ] }, @@ -839,71 +831,135 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Retrieval QA chain" + "#### Retrieval Augmented Generation\n", + "Workflow \n", + "1. Use faster LLM (GPT-3.5) to generate 3 rephrased variants of the original user question to improve question quality which in-turn should improve retrieval\n", + "2. Use the rephrased question to generate the final answer using RAG" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Generate rephrased questions\n", + "Use faster LLM (GPT-3.5) to generate 3 rephrased variants of the original user question to improve question quality which in-turn should improve retrieval" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 363, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['What is the meaning of scout awards?',\n", + " 'Can you explain what scout awards are?',\n", + " 'Could you provide a description of scout awards?']" + ] + }, + "execution_count": 363, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from langchain.chains import RetrievalQA\n", + "from langchain.chains import LLMChain\n", "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts import PromptTemplate\n", "\n", - "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), return_source_documents=True)\n", + "prompt_template = \"\"\"You are a teacher who is helping a student ask the right questions about a service so that they can look in the most relevant places to find the answer. \n", + "# INSTRUCTIONS\n", + "- You are given student's question below\n", + "- Using the original question, generate 3 alternative questions that are rephrased to be not vague or ambiguous so as to clearly convey the same meaning and context as the original question\n", + "- Return the final result as a JSON object containing a list of rephrased questions as \"new_questions\" field\n", "\n", + "# QUESTION\n", + "{question}\n", "\n", - "def call_llm(question):\n", - " result = qa({\"query\": question})\n", - " return result\n", - " \n", + "# RESULT\n", + "\"\"\"\n", "\n", - "def ask(question):\n", - " result = call_llm(question)\n", - " display(Markdown(f\"### Question\"))\n", - " display(Markdown(question))\n", "\n", - " display(Markdown(f\"### Answer\"))\n", - " display(Markdown(result[\"result\"]))\n", + "def generate_rephrased_questions(question):\n", + " chat = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + " llm_chain = LLMChain(llm=chat, prompt=PromptTemplate.from_template(prompt_template))\n", "\n", - " display(Markdown(f\"### Sources\"))\n", - " sources = [r.metadata['url'] if 'url' in r.metadata else r.metadata['source'] for r in result[\"source_documents\"] ]\n", - " print(\", \".join(sources))" + " result = llm_chain(inputs={\"question\": question}, return_only_outputs=True)\n", + " result_dict = json.loads(result['text'])\n", + " new_questions = result_dict['new_questions']\n", + " return new_questions\n", + "\n", + "generate_rephrased_questions(\"What are scout awards?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Generate final answer using RAG" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 296, "metadata": {}, "outputs": [], "source": [ - "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", - "MAVA_MISANSWERED_QUES = [\n", - " \"what's a scout?\",\n", - " \"Am I allowed to use AI in an audit?\",\n", - " \"Can I change my Code4rena username?\",\n", - " \"How do I book a solo audit?\",\n", - " \"Do I need to be certified to participate in an audit?\",\n", - " \"How do bot races work?\",\n", - " \"Can I change my Code4rena profile name?\",\n", - " \"What are scout awards?\",\n", - " \"What are analysis reports?\",\n", - " \"what is an analysis finding?\",\n", - " \"My name wasn't in the award announcements. When can I check on my results?\",\n", - " \"How long does the certification process take?\",\n", - " \"How can I access findings.csv?\"\n", - "]" + "def display_result(question, result):\n", + " display(Markdown(f\"### Question\"))\n", + " display(Markdown(\"ORIGINAL: \" + question))\n", + " display(Markdown(\"REPHRASED: \" + f\"{result['rephrased_question'] if result['rephrased_question'] else 'None'}\"))\n", + "\n", + " display(Markdown(f\"### Answer\"))\n", + " display(Markdown(result[\"result\"]))\n", + "\n", + " display(Markdown(f\"### Sources\"))\n", + " sources = [r.metadata['url'] if 'url' in r.metadata else r.metadata['source'] for r in result[\"source_documents\"] ]\n", + " print(\", \".join(sources))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 354, "metadata": {}, "outputs": [], "source": [ - "# for q in MAVA_MISANSWERED_QUES:\n", - "# ask(q)" + "from langchain.chains import RetrievalQA\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), return_source_documents=True)\n", + "\n", + "\n", + "def call_llm(question, use_rephrased_questions=True):\n", + " if not use_rephrased_questions:\n", + " result = qa({\"query\": question})\n", + " result['rephrased_question'] = None\n", + " return result\n", + "\n", + "\n", + " # Get rephrased questions\n", + " rephrased_questions = generate_rephrased_questions(question)\n", + "\n", + " # Attempt each question until a valid result is found\n", + " for q in rephrased_questions:\n", + " result = qa({\"query\": q})\n", + " answer = result['result']\n", + " result['rephrased_question'] = None\n", + " \n", + " # If the model is unable to find an answer, it returns 'sorry' in the response, we try again with a different question\n", + " if 'sorry' in answer.lower():\n", + " continue\n", + " else:\n", + " result['rephrased_question'] = q\n", + " break\n", + "\n", + " return result\n", + " \n", + "\n", + "def ask(question, use_rephrased_questions=True):\n", + " result = call_llm(question, use_rephrased_questions)\n", + " display_result(question, result)\n" ] }, { @@ -916,7 +972,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 228, "metadata": {}, "outputs": [], "source": [ @@ -933,7 +989,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 229, "metadata": {}, "outputs": [], "source": [ @@ -991,7 +1047,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 230, "metadata": {}, "outputs": [], "source": [ @@ -1035,7 +1091,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 231, "metadata": {}, "outputs": [], "source": [ @@ -1049,7 +1105,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 232, "metadata": {}, "outputs": [], "source": [ @@ -1060,7 +1116,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 233, "metadata": {}, "outputs": [], "source": [ @@ -1077,7 +1133,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 234, "metadata": {}, "outputs": [ { @@ -1104,8 +1160,8 @@ " question\n", " Mava correct answer (True value)\n", " Bot answers\n", - " Answer similarity score\n", " Retrieval relevancy score\n", + " Answer similarity score\n", " \n", " \n", " \n", @@ -1113,15 +1169,15 @@ " 0\n", " Hi, how can I get backstage access?\n", " To get backstage access, you need to become a ...\n", - " I'm sorry, but the provided context does not c...\n", - " Incorrect\n", - " Incorrect\n", + " To obtain +Backstage access, you need to meet ...\n", + " Correct\n", + " Correct\n", " \n", " \n", " 1\n", " how long does it take until findings are relea...\n", " Based on the context provided, the findings fr...\n", - " The audit report, which includes the findings,...\n", + " The findings are released and made public when...\n", " Correct\n", " Correct\n", " \n", @@ -1129,7 +1185,7 @@ " 2\n", " When can I talk about findings?\n", " You can talk about your findings after the con...\n", - " The context does not provide information on wh...\n", + " The appropriate time to discuss findings is af...\n", " Incorrect\n", " Incorrect\n", " \n", @@ -1137,23 +1193,23 @@ " 3\n", " How do I change my wallet address?\n", " To change your wallet address, follow these st...\n", - " Unfortunately, due to some restrictions in Mor...\n", - " Incorrect\n", - " Incorrect\n", + " To update your wallet address, follow these st...\n", + " Correct\n", + " Correct\n", " \n", " \n", " 4\n", " What are scouts?\n", " In the context of Code4rena, Scouts are indivi...\n", - " The text provided does not provide any informa...\n", - " Incorrect\n", - " Incorrect\n", + " Scouts are individuals who focus on scoping an...\n", + " Correct\n", + " Correct\n", " \n", " \n", " 5\n", " How long does the contest process usually take?\n", " Based on the provided context, the contest pro...\n", - " The contest process, from the closing of audit...\n", + " The typical duration of the contest process, f...\n", " Correct\n", " Correct\n", " \n", @@ -1161,7 +1217,7 @@ " 6\n", " how does certification work?\n", " The certification process at Code4rena works i...\n", - " Certification for wardens involves a process w...\n", + " To obtain certification, you need to submit th...\n", " Correct\n", " Correct\n", " \n", @@ -1169,31 +1225,15 @@ " 7\n", " Can I use bots to analyze code?\n", " Yes, you can use bots to analyze code. In fact...\n", - " Yes, you can use bots to analyze code. In fact...\n", + " Yes, it is possible to utilize bots for code a...\n", " Correct\n", " Correct\n", " \n", " \n", " 8\n", - " Can I use bots to analyze code?\n", - " Yes, you can use bots to analyze code. In fact...\n", - " Yes, you can use bots to analyze code. In fact...\n", - " Correct\n", - " Correct\n", - " \n", - " \n", - " 9\n", - " Can I use chatgpt?\n", - " Yes, you can use bots to analyze code. This is...\n", - " The provided context does not contain informat...\n", - " Incorrect\n", - " Incorrect\n", - " \n", - " \n", - " 10\n", " What is a lookout?\n", " In the context provided, a lookout is a role i...\n", - " A Lookout in the context of Code4rena is a rol...\n", + " A Lookout is a role in Code4rena's competition...\n", " Correct\n", " Correct\n", " \n", @@ -1202,60 +1242,52 @@ "" ], "text/plain": [ - " question \\\n", - "0 Hi, how can I get backstage access? \n", - "1 how long does it take until findings are relea... \n", - "2 When can I talk about findings? \n", - "3 How do I change my wallet address? \n", - "4 What are scouts? \n", - "5 How long does the contest process usually take? \n", - "6 how does certification work? \n", - "7 Can I use bots to analyze code? \n", - "8 Can I use bots to analyze code? \n", - "9 Can I use chatgpt? \n", - "10 What is a lookout? \n", + " question \\\n", + "0 Hi, how can I get backstage access? \n", + "1 how long does it take until findings are relea... \n", + "2 When can I talk about findings? \n", + "3 How do I change my wallet address? \n", + "4 What are scouts? \n", + "5 How long does the contest process usually take? \n", + "6 how does certification work? \n", + "7 Can I use bots to analyze code? \n", + "8 What is a lookout? \n", "\n", - " Mava correct answer (True value) \\\n", - "0 To get backstage access, you need to become a ... \n", - "1 Based on the context provided, the findings fr... \n", - "2 You can talk about your findings after the con... \n", - "3 To change your wallet address, follow these st... \n", - "4 In the context of Code4rena, Scouts are indivi... \n", - "5 Based on the provided context, the contest pro... \n", - "6 The certification process at Code4rena works i... \n", - "7 Yes, you can use bots to analyze code. In fact... \n", - "8 Yes, you can use bots to analyze code. In fact... \n", - "9 Yes, you can use bots to analyze code. This is... \n", - "10 In the context provided, a lookout is a role i... \n", + " Mava correct answer (True value) \\\n", + "0 To get backstage access, you need to become a ... \n", + "1 Based on the context provided, the findings fr... \n", + "2 You can talk about your findings after the con... \n", + "3 To change your wallet address, follow these st... \n", + "4 In the context of Code4rena, Scouts are indivi... \n", + "5 Based on the provided context, the contest pro... \n", + "6 The certification process at Code4rena works i... \n", + "7 Yes, you can use bots to analyze code. In fact... \n", + "8 In the context provided, a lookout is a role i... \n", "\n", - " Bot answers Answer similarity score \\\n", - "0 I'm sorry, but the provided context does not c... Incorrect \n", - "1 The audit report, which includes the findings,... Correct \n", - "2 The context does not provide information on wh... Incorrect \n", - "3 Unfortunately, due to some restrictions in Mor... Incorrect \n", - "4 The text provided does not provide any informa... Incorrect \n", - "5 The contest process, from the closing of audit... Correct \n", - "6 Certification for wardens involves a process w... Correct \n", - "7 Yes, you can use bots to analyze code. In fact... Correct \n", - "8 Yes, you can use bots to analyze code. In fact... Correct \n", - "9 The provided context does not contain informat... Incorrect \n", - "10 A Lookout in the context of Code4rena is a rol... Correct \n", + " Bot answers \\\n", + "0 To obtain +Backstage access, you need to meet ... \n", + "1 The findings are released and made public when... \n", + "2 The appropriate time to discuss findings is af... \n", + "3 To update your wallet address, follow these st... \n", + "4 Scouts are individuals who focus on scoping an... \n", + "5 The typical duration of the contest process, f... \n", + "6 To obtain certification, you need to submit th... \n", + "7 Yes, it is possible to utilize bots for code a... \n", + "8 A Lookout is a role in Code4rena's competition... \n", "\n", - " Retrieval relevancy score \n", - "0 Incorrect \n", - "1 Correct \n", - "2 Incorrect \n", - "3 Incorrect \n", - "4 Incorrect \n", - "5 Correct \n", - "6 Correct \n", - "7 Correct \n", - "8 Correct \n", - "9 Incorrect \n", - "10 Correct " - ] - }, - "execution_count": 109, + " Retrieval relevancy score Answer similarity score \n", + "0 Correct Correct \n", + "1 Correct Correct \n", + "2 Incorrect Incorrect \n", + "3 Correct Correct \n", + "4 Correct Correct \n", + "5 Correct Correct \n", + "6 Correct Correct \n", + "7 Correct Correct \n", + "8 Correct Correct " + ] + }, + "execution_count": 234, "metadata": {}, "output_type": "execute_result" } @@ -1267,22 +1299,22 @@ " \"question\": [d['question'] for d in yaml_data],\n", " \"Mava correct answer (True value)\": [d['answer'] for d in yaml_data],\n", " \"Bot answers\": [p['result'] for p in predictions],\n", - " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades],\n", - " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades]\n", + " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades],\n", + " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades]\n", "})\n", "df" ] }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 235, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Bot Accuracy: 0.5454545454545454\n" + "Bot Accuracy: 0.8888888888888888\n" ] } ], @@ -1290,22 +1322,1374 @@ "print(f\"Bot Accuracy: {df['Answer similarity score'].value_counts()['Correct'] / len(df['Answer similarity score'])}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### HyDE technique\n", + "This technique can help improve information retrieval\n", + "\n", + "https://python.langchain.com/docs/use_cases/question_answering/how_to/hyde" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vectorstore_hyde = Chroma(\"store_hyde_1\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "vectorstore_hyde.add_documents(website_chunks)\n", + "vectorstore_hyde.add_documents(gh_docs_chunks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.vectorstores.base import VectorStoreRetriever\n", + "from langchain.callbacks.manager import (\n", + " AsyncCallbackManagerForRetrieverRun,\n", + " CallbackManagerForRetrieverRun,\n", + ")\n", + "from langchain.docstore.document import Document\n", + "from typing import List\n", + "\n", + "class HydeRetriever(VectorStoreRetriever):\n", + "\n", + " def _get_relevant_documents(\n", + " self, query: str, *, run_manager: CallbackManagerForRetrieverRun\n", + " ) -> List[Document]:\n", + " llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + " web_search_template = \"\"\"Please write a passage to answer the question \n", + " Question: {QUESTION}\n", + " Passage:\"\"\"\n", + "\n", + " web_search = PromptTemplate(template=web_search_template, input_variables=[\"QUESTION\"])\n", + "\n", + " llm_chain = LLMChain(llm=llm, prompt=web_search)\n", + "\n", + " result = llm_chain(inputs={\"QUESTION\": query}, return_only_outputs=True)\n", + " hyquery = result['text']\n", + "\n", + " return super()._get_relevant_documents(hyquery, run_manager=run_manager)\n", + "\n", + "\n", + "hyde_retriever = HydeRetriever(vectorstore=vectorstore_hyde)\n", + "\n", + "hyde_retriever.get_relevant_documents(\"How can I access findings.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 299, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=hyde_retriever, return_source_documents=True)\n", + "\n", + "\n", + "def call_hyde_llm(question):\n", + " result = qa({\"query\": question})\n", + " result['rephrased_question'] = None\n", + " return result\n", + "\n", + "def ask_hyde(question):\n", + " result = call_hyde_llm(question)\n", + " display_result(question, result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Final Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": 325, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "\n", + "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "vectorstore_with_sources = Chroma(\"vectorstore_with_sources3\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "\n", + "for i, d in enumerate(website_chunks):\n", + " d.metadata['source'] = f\"w{i}-pl\"\n", + " vectorstore_with_sources.add_documents([d])\n", + "\n", + "for i, d in enumerate(gh_docs_chunks):\n", + " local_path = d.metadata['source']\n", + " d.metadata['source'] = f\"g{i}-pl\"\n", + " d.metadata['url'] = f\"{local_path.replace(C4_GH_DOCS_STORAGE_DIR, 'https://github.com/code-423n4/docs/blob/main/')}\"\n", + " vectorstore_with_sources.add_documents([d])" + ] + }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 372, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide information about the different types of scout awards?', '2. How can I learn more about the various scout awards available?', '3. What is the significance of scout awards and how can one earn them?']\n" + ] + }, { "data": { "text/plain": [ - "55" + "[Document(page_content=\"### How can I become a Judge?\\n\\nComplete [this form](https://code4rena.com/judge-application/) and share: Short bio/intro and summary of relevant experience, links that help demonstrate your expertise, 3 example submissions to Code4rena contests that were judged high severity, description of how each submission demonstrates your depth of knowledge.\\n\\n### How can I become a Scout?\\n\\nYou can’t, just yet! Right now, Scouts are hand-picked by the C4 team as it’s a highly sensitive role. We’re looking at the possibility of opening up this process, but not in the near future.\\n\\n### I want Code4rena to audit my project, where do I start?\\n\\nIt’s really simple! Just visit [this link](https://code4rena.typeform.com/i-want-an-audit) and fill out the form. Our team will be in touch with you shortly after you’ve completed it.\\n\\n### Do you have a blog?\\n\\nWe do indeed, [here](https://medium.com/code-423n4). We post product updates, sponsor interviews and more.\\n\\n### What’s the best way to stay up to date with Code4rena?\\n\\n[Follow us](https://twitter.com/code4rena) on Twitter and turn on notifications in our #announcements channel in [Discord](https://discord.gg/code4rena). \\n\\n### Where can I view the audit results?\\n\\nOnce an audit's results have been finalized, they’ll be shared in our #announcements channel in Discord. The audit's page in the ‘[Audits](https://code4rena.com/contests)’ section on our website will also be updated to show results.\\n\\n### Where can I read past Code4rena audit reports?\\n\\nWe push all public audit reports to the ‘[Reports](https://code4rena.com/reports)’ section on our website. This enables you to read through past findings, evaluate commonalities, and continue your learning journey. Our [GitHub](https://github.com/code-423n4/) repos are also public, for those of you interested in diving deeper.\", metadata={'source': 'g68-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//structure/frequently-asked-questions.md'}),\n", + " Document(page_content='#### Can I see some examples of how awards work?\\n\\nAwards for each contest are [posted on the Code4rena website](https://code4rena.com/contests). See [Numoen](https://code4rena.com/contests/2023-01-numoen-findings), for example. The award calculation for Numoen had the following parameters:\\n\\n* **Total awards: 50,000 USDC**\\n* Main award pool: 42,500 USDC\\n* QA pool: 5,000 USDC\\n* Gas pool: 2,500 USDC\\n\\nThe table below shows each unique high and medium severity finding (`H-XX`, `M-XX`), QA report (`Q-XX`), gas optimization report (`G-XX`), and the way each submission’s award was calculated:\\n\\n* `pie` is the number of shares assigned to that report or finding\\n* `split` is the number of times those shares were divided\\n* `slice` is the number of shares assigned for that warden’s finding\\n* each `award` is calculated by `shares * (pot / number_of_shares)`\\n\\n**Tribe Turbo awards**', metadata={'source': 'g22-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/curve-logic.md'}),\n", + " Document(page_content='# Incentive model and awards\\n\\nTo incentivize **wardens**, C4 uses a unique scoring system with two primary goals: reward contestants for finding unique bugs and also to make the audit resistant to Sybil attack. A secondary goal of the scoring system is to encourage contestants to form teams and collaborate.\\n\\n**Judges** are incentivized to review findings and decide their severity, validity, and quality by receiving a share of the prize pool themselves.\\n\\n## High and Medium Risk bugs\\n\\nContestants are given shares for bugs discovered based on severity, and those shares give the owner a pro rata piece of the pot:\\n\\n`Med Risk Shares: 3 * (0.9 ^ (findingCount - 1)) / findingCount`\\\\\\n`High Risk Shares: 10 * (0.9 ^ (findingCount - 1)) / findingCount`\\n\\nFindingCount represents the number of findings for a same specific bug.\\nPlease note that findings with partial credit as still count as 1 finding in the algorithm\\n\\nDuring awarding, each share is redeemed for: `pot / number of shares`.\\n\\n### Bonus for best / selected for report\\n\\nFor each unique High or Medium finding, the submission selected for inclusion in the audit report receives a 30% share bonus.\\n\\n### Duplicates getting partial credit\\n\\nAll issues which identify the same functional vulnerability will be considered duplicates regardless of effective rationalization of severity or exploit path.\\n\\nHowever, any submissions which do not identify or effectively rationalize the top identified severity case may be judged as “partial credit” and may have their shares in that finding’s pie divided by 2 or 4 at judge’s sole discretion (e.g. 50% or 25% of the shares of a satisfactory submission in the duplicate set).', metadata={'source': 'g17-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md'}),\n", + " Document(page_content='---\\ndescription: >-\\n This is a high level overview of the C4 awarding process. \\n---\\n\\n# Awarding process\\n\\nAt the conclusion of an audit, sponsors review wardens’ findings and express their opinions with regard to severity of issues. Judges evaluate input from both and make the ultimate decision in terms of severity and validity of issues. (See [How to judge an audit](../../roles/judges/how-to-judge-a-contest.md) for more detail.)\\n\\nIn making their determination, judges add labels to Github issues, while the original submission data (including the warden\\'s proposed severity rating) is preserved via a JSON data file. \\n\\nThe judge\\'s decisions are reviewed by the sponsoring project team and by [+backstage wardens](https://docs.code4rena.com/roles/certified-contributors/backstage-wardens) via a 48-hour QA process, to ensure fairness and quality. \\n\\nJudging data is used to generate the awards using Code4rena\\'s award calculation script, which factors in:\\n\\n- Risk level\\n- Validity\\n- Number of duplicates\\n- Grade (A, B, C; Satisfactory/Unsatisfactory)\\n- In some cases, \"partial duplicate\" status\\n\\n It should be possible to reverse engineer awards using a combination of two CSV files:\\n \\n - [`findings.csv`](https://code4rena.com/community-resources/findings.csv): valid Code4rena findings\\n - [`contests.csv`](https://code4rena.com/community-resources/contests.csv): Code4rena audits \\n\\nOnce awards are determined, we generate a CSV file enumerating funds to be sent. Distribution is then initiated using disperse.app and sent to multisig signers for completion of payment.', metadata={'source': 'g13-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/awarding-process.md'}),\n", + " Document(page_content='### Bug bounties vs C4 audits\\n\\n| Bug bounties | Competitive audits |\\n| ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |\\n| **Spec work.** No way to have confidence that the time invested will produce a payout. | **Guaranteed payouts.** Auditors know it’s highly likely they can find a bug that will make it worth their time. |\\n| **Dark forest.** Who knows how much competition there is right now? Or how mature the codebase is? | **Low-hanging fruit.** If a project is seeking an audit, it’s likely fresh code with clear opportunities to dig in. |\\n| **Grow on your own.** Researchers have to proactively look for ways to learn and level up their skills. | **Learning community.** Open, competitive audits let auditors compare everyone’s findings and learn new things every single week. |\\n| **Paradox of choice.** So many projects have bounties. How does an auditor choose which to focus on? | **Less FOMO.** C4 runs a handful of active audits at a time and wardens can RSVP to signal to each other which audits have more participants. |', metadata={'source': 'g2-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//README.md'}),\n", + " Document(page_content='### Getting an audit is courageous\\n\\nAn audit is an investment in a better long-term outcome for both the project undergoing the audit as well as its users and the community as a whole. \\n\\nIt requires tremendous courage to ask someone to look closely and find as many places where code could be improved. \\n\\nBecause of this, we treat sponsor projects with respect. We consider every finding discovered in an audit contest as a tool that we can use in the future to help others learn and better understand smart contract vulnerabilities. \\n\\n### Both wardens and sponsors have a voice in the process\\n\\nWe take sponsors’ feelings into consideration, working to help find ways to communicate and disclose the results of audits in a way that works as best as we can to eliminate the “blame and shame” approach and replace it with one that honors sponsors’ contribution toward making their project and the DeFi space more secure for everyone.\\n\\nOur contest process is transparent, with all issues reviewable on GitHub. We put an impartial judge in the position of listening to all players’ positions on a given issue and making a final determination.\\n\\n### We are a community\\n\\nWe trust that a community-driven approach with valuable incentives ensures enough coverage to give sponsors a meaningful and valuable audit without putting the burden on any one person to do a ’good enough job’ catching every bug—a burden that burns out traditional auditors.\\n\\nOur community chose the wolf as our symbol, and it’s fitting. Wolves are highly collaborative creatures who hunt in packs.\\n\\nRather than putting the pressure on individual auditors to catch all the vulnerabilities in a project, we ask competitors to catch the bugs that they can, and trust that the community will show up and contribute.', metadata={'source': 'g63-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//philosophy/security-is-about-people.md'})]" ] }, - "execution_count": 108, + "execution_count": 372, "metadata": {}, "output_type": "execute_result" } ], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.retrievers.multi_query import MultiQueryRetriever\n", + "\n", + "question = \"What are scout awards?\"\n", + "llm = ChatOpenAI(temperature=0)\n", + "retriever_from_llm = MultiQueryRetriever.from_llm(\n", + " retriever=vectorstore_with_sources.as_retriever(), llm=llm\n", + ")\n", + "import logging\n", + "\n", + "logging.basicConfig()\n", + "logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.INFO)\n", + "\n", + "unique_docs = retriever_from_llm.get_relevant_documents(query=question)\n", + "unique_docs" + ] + }, + { + "cell_type": "code", + "execution_count": 357, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQAWithSourcesChain\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "\n", + "model = ChatOpenAI(model_name=\"gpt-4\", temperature=0)\n", + "\n", + "qa = RetrievalQAWithSourcesChain.from_chain_type(model, chain_type=\"stuff\", retriever=vectorstore_with_sources.as_retriever(), return_source_documents=True)\n", + "\n", + "\n", + "def run_qa_with_sources(question, use_rephrased_questions=False):\n", + "\n", + " rephrased_question = None\n", + "\n", + " if not use_rephrased_questions:\n", + " result = qa({\"question\": question}, return_only_outputs=True)\n", + " else:\n", + " \n", + " rephrased_questions = generate_rephrased_questions(question)\n", + "\n", + " # Attempt each question until a valid result is found\n", + " for q in rephrased_questions:\n", + " result = qa({\"question\": q}, return_only_outputs=True) \n", + " # If the model is unable to find an answer, it returns 'sorry' in the response, we try again with a different question\n", + " if 'sorry' in result['answer'].lower():\n", + " continue\n", + " else:\n", + " rephrased_question = q\n", + " break\n", + "\n", + " answer = result['answer']\n", + " source_ids = result['sources']\n", + " source_docs = result['source_documents']\n", + "\n", + " source_urls = set()\n", + " for d in source_docs:\n", + " metadata = d.metadata\n", + " source_id = metadata['source']\n", + " url = metadata['url']\n", + " if source_id in source_ids:\n", + " source_urls.add(url)\n", + " return dict(answer=answer, source_urls=source_urls, rephrased_question=rephrased_question)\n", + "\n", + "def ask_with_sources(question, use_rephrased_questions=False):\n", + " result = run_qa_with_sources(question, use_rephrased_questions)\n", + "\n", + " display(Markdown(f\"### Question\"))\n", + " display(Markdown(\"ORIGINAL: \" + question))\n", + " display(Markdown(\"REPHRASED: \" + f\"{result['rephrased_question'] if 'rephrased_question' in result else 'None'}\"))\n", + "\n", + " display(Markdown(f\"### Answer\"))\n", + " display(Markdown(result[\"answer\"]))\n", + "\n", + " display(Markdown(f\"### Sources\"))\n", + " print(\", \".join(result['source_urls']))" + ] + }, + { + "cell_type": "code", + "execution_count": 360, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: what's a scout?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: What is the definition of a scout?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "A scout in the context of Code4rena is a highly sensitive role that is currently hand-picked by the C4 team. The process of becoming a scout is not open to the public at this time.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//structure/frequently-asked-questions.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: Am I allowed to use AI in an audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: Can I utilize AI technology during an audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Yes, you can utilize AI technology during an audit. However, using the output of AI tools like ChatGPT, GPT-3, or other automated tools for audit submissions is discouraged as it can lead to a high ratio of nonsense submissions. Automated tools can be used as a first pass, and findings from these tools can be built upon to identify high and medium severity issues. Code4rena also runs a Bot Race at the start of each audit, where AI-driven bots compete to create the highest quality and most thorough audit report. The winning report is shared with all wardens and all findings in the winning Bot Report are declared publicly known issues, and therefore ineligible for awards.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//roles/wardens/submission-policy.md, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: Can I change my Code4rena username?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: Is it possible to modify my Code4rena username?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The documents do not provide information on whether it is possible to modify a Code4rena username.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//roles/wardens/warden-auth.md, https://code4rena.com/help\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: How do I book a solo audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: What is the process for reserving a solo audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The process for reserving a solo audit involves the following steps:\n", + "\n", + "1. A project team member clicks the “Get a quote” button on a warden’s profile, and shares scoping details with the Code4rena team.\n", + "2. Code4rena staff then consult with the warden and project team to firm up scoping, pricing, and dates.\n", + "\n", + "When a solo audit is booked via a warden’s “Available for hire” function, both the warden and the project have direct access to Code4rena staff for assistance with legal agreements, scoping, pricing, and logistical planning, collection and disbursement of funds, and external accountability via a Code4rena Judge.\n", + "\n", + "If you are a Certified warden, you can make yourself available for solo audits by logging in to your user account on the Code4rena website, and opting in to the “Available for hire” function on the Settings screen.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//roles/wardens/solo-audits.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: Do I need to be certified to participate in an audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: Is certification required to take part in an audit?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Yes, certification is required to take part in an audit. Certified Wardens have the opportunity to book solo audits through Code4rena.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//roles/certified-contributors/README.md, https://github.com/code-423n4/docs/blob/main//roles/wardens/solo-audits.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: How do bot races work?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: What is the process of bot races?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The process of bot races involves two stages. In the first stage, known as the Qualifier Race, Bot Crews compete to have their bots deliver the highest quality and most thorough report based on a repo provided at start time. This stage is open for one hour, and judges choose the Top 20 Bots for upcoming competitions. In the second stage, known as the Bot Race, the first hour of each audit competition is dedicated to a Bot Race. Bot Race submissions are assessed by Lookouts, and graded A/B/C similarly to other reports based on report quality, validity, and thoroughness. The best Bot Race report will receive a 30% share bonus. The top report will be shared in the competition's channel and will be considered the official source for known issue submissions. The winning bot report is selected and shared with all wardens within 24 hours of the audit start time. The full set of issues identified by the best automated tools are considered out of scope for the audit and ineligible for awards.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://code4rena.com/how-it-works, https://code4rena.com/register/bot, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: Can I change my Code4rena profile name?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: Is it possible to modify my Code4rena profile name?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The documents do not provide information on whether it is possible to modify a Code4rena profile name.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://code4rena.com/register, https://code4rena.com/contests/2023-05-chainlink-cross-chain-services-ccip-and-arm-network, https://code4rena.com/contests/2023-01-reserve-contest, https://code4rena.com/help\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: What are scout awards?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: What is the meaning of scout awards?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Scout awards in the context of the Code4rena (C4) community are incentives given to wardens (participants) who find unique bugs in a project undergoing an audit. The awarding process involves judges who evaluate the severity and validity of the issues found. The awards are calculated based on several factors including risk level, validity, number of duplicates, grade (A, B, C; Satisfactory/Unsatisfactory), and in some cases, \"partial duplicate\" status. For each unique High or Medium finding, the submission selected for inclusion in the audit report receives a 30% share bonus. All issues which identify the same functional vulnerability are considered duplicates, but may receive partial credit. The awards are then distributed to the wardens.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/curve-logic.md, https://github.com/code-423n4/docs/blob/main//philosophy/security-is-about-people.md, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/awarding-process.md, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: What are analysis reports?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: Can you explain what analysis reports are?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "Analysis reports are written submissions that provide a comprehensive review of a codebase or a system. They outline the approach taken in reviewing the code, observations or advice about architecture, mechanism, or approach, and any broader concerns like systemic risks or centralization risks. These reports provide high-level insights and advice that aren't necessarily covered by specific bugs. They are judged based on their quality and the top Analysis is selected for inclusion in the audit report. In the context of Code4rena audits, each warden is encouraged to submit an Analysis alongside their findings for each audit. Advanced-level Analyses compete for a portion of each audit's award pool, and are graded and awarded similarly to QA and Gas Optimization reports.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md, https://github.com/code-423n4/docs/blob/main//awarding/judging-criteria/README.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: what is an analysis finding?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: What does the term 'analysis finding' mean?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "An 'analysis finding' refers to a part of an audit report that outlines the auditors' analysis of the codebase as a whole, including any observations or advice they have about architecture, mechanism, or approach, broader concerns like systemic risks or centralization risks, the approach taken in reviewing the code, and new insights and learnings from the audit. It provides auditors with an opportunity to contribute value through high level insights and advice that aren't necessarily covered by specific bugs.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//awarding/judging-criteria/README.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: My name wasn't in the award announcements. When can I check on my results?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: When will the award announcements be available for me to check my results?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The award announcements will be available for you to check your results approximately between 25 to 34 days after the audit submissions close.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//structure/our-process/README.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: How long does the certification process take?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: What is the duration of the certification process?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The certification process typically takes one business day after the Certified Contributor Application form is submitted, provided all necessary documents are readily available. However, it may take longer if the necessary documents need to be assembled. The C4 audit process, on the other hand, takes longer than the ideal timeline, with the actual average ranging from Day 1 for audit submissions close to Day 42-60 for the audit report to be published and audit issues to be made public.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//structure/our-process/README.md, https://github.com/code-423n4/docs/blob/main//roles/certified-contributors/README.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: How can I access findings.csv?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: What is the process for accessing findings.csv?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "To access findings.csv, you can visit the link https://code4rena.com/community-resources/findings.csv. This file contains valid Code4rena findings. It is used in combination with contests.csv to reverse engineer awards. \n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/awarding-process.md\n" + ] + }, + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: Can I use chatgpt?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "REPHRASED: Is it possible for me to utilize chatgpt?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "The documents do not provide information on the possibility of utilizing chatgpt.\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//structure/frequently-asked-questions.md, https://code4rena.com/, https://code4rena.com/@gpersoon, https://code4rena.com/@0xA5DF\n" + ] + } + ], + "source": [ + "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", + "MAVA_MISANSWERED_QUES = [\n", + " \"what's a scout?\",\n", + " \"Am I allowed to use AI in an audit?\",\n", + " \"Can I change my Code4rena username?\",\n", + " \"How do I book a solo audit?\",\n", + " \"Do I need to be certified to participate in an audit?\",\n", + " \"How do bot races work?\",\n", + " \"Can I change my Code4rena profile name?\",\n", + " \"What are scout awards?\",\n", + " \"What are analysis reports?\",\n", + " \"what is an analysis finding?\",\n", + " \"My name wasn't in the award announcements. When can I check on my results?\",\n", + " \"How long does the certification process take?\",\n", + " \"How can I access findings.csv?\",\n", + " \"Can I use chatgpt?\"\n", + "]\n", + "\n", + "for q in MAVA_MISANSWERED_QUES:\n", + " ask_with_sources(q, use_rephrased_questions=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] } ], From 8afd766bcf3c79d3f923f89482773a27894043be Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Wed, 13 Sep 2023 23:59:51 -0500 Subject: [PATCH 07/11] wip --- qa_bot/qa_bot.ipynb | 1512 +++++++++---------------------------------- 1 file changed, 294 insertions(+), 1218 deletions(-) diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index 15a6a78..a4386b4 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -847,7 +847,7 @@ }, { "cell_type": "code", - "execution_count": 363, + "execution_count": 383, "metadata": {}, "outputs": [ { @@ -858,7 +858,7 @@ " 'Could you provide a description of scout awards?']" ] }, - "execution_count": 363, + "execution_count": 383, "metadata": {}, "output_type": "execute_result" } @@ -921,7 +921,7 @@ }, { "cell_type": "code", - "execution_count": 354, + "execution_count": 408, "metadata": {}, "outputs": [], "source": [ @@ -1091,7 +1091,7 @@ }, { "cell_type": "code", - "execution_count": 231, + "execution_count": 409, "metadata": {}, "outputs": [], "source": [ @@ -1105,18 +1105,18 @@ }, { "cell_type": "code", - "execution_count": 232, + "execution_count": 411, "metadata": {}, "outputs": [], "source": [ "predictions = [{'result': a} for a in bot_answers]\n", "\n", - "answer_grades = grade_model_retrieval(yaml_data, predictions)" + "answer_grades = grade_model_answer(yaml_data, predictions)" ] }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 415, "metadata": {}, "outputs": [], "source": [ @@ -1126,14 +1126,21 @@ " for j, doc in enumerate(source_docs[i]):\n", " retrieved_doc_text += \"Doc %s: \" % str(j + 1) + doc.page_content + \" \"\n", " retrieved = {\"question\": d[\"question\"], \"answer\": d[\"answer\"], \"result\": retrieved_doc_text}\n", - " retrieved_docs.append(retrieved)\n", - "\n", - "retrieval_grades = grade_model_retrieval(yaml_data, predictions)" + " retrieved_docs.append(retrieved)" + ] + }, + { + "cell_type": "code", + "execution_count": 416, + "metadata": {}, + "outputs": [], + "source": [ + "retrieval_grades = grade_model_retrieval(yaml_data, retrieved_docs)" ] }, { "cell_type": "code", - "execution_count": 234, + "execution_count": 417, "metadata": {}, "outputs": [ { @@ -1171,13 +1178,13 @@ " To get backstage access, you need to become a ...\n", " To obtain +Backstage access, you need to meet ...\n", " Correct\n", - " Correct\n", + " Incorrect\n", " \n", " \n", " 1\n", " how long does it take until findings are relea...\n", " Based on the context provided, the findings fr...\n", - " The findings are released and made public when...\n", + " The audit report is published and audit issues...\n", " Correct\n", " Correct\n", " \n", @@ -1185,15 +1192,15 @@ " 2\n", " When can I talk about findings?\n", " You can talk about your findings after the con...\n", - " The appropriate time to discuss findings is af...\n", - " Incorrect\n", + " You can discuss the findings after the audit r...\n", " Incorrect\n", + " Correct\n", " \n", " \n", " 3\n", " How do I change my wallet address?\n", " To change your wallet address, follow these st...\n", - " To update your wallet address, follow these st...\n", + " To update your wallet address, you need to:\\n\\...\n", " Correct\n", " Correct\n", " \n", @@ -1201,7 +1208,7 @@ " 4\n", " What are scouts?\n", " In the context of Code4rena, Scouts are indivi...\n", - " Scouts are individuals who focus on scoping an...\n", + " Scouts in the context of Code4rena are individ...\n", " Correct\n", " Correct\n", " \n", @@ -1209,15 +1216,15 @@ " 5\n", " How long does the contest process usually take?\n", " Based on the provided context, the contest pro...\n", - " The typical duration of the contest process, f...\n", - " Correct\n", + " Most audits typically run for 3-7 days.\n", " Correct\n", + " Incorrect\n", " \n", " \n", " 6\n", " how does certification work?\n", " The certification process at Code4rena works i...\n", - " To obtain certification, you need to submit th...\n", + " The certification process is as follows:\\n\\n1....\n", " Correct\n", " Correct\n", " \n", @@ -1233,8 +1240,8 @@ " 8\n", " What is a lookout?\n", " In the context provided, a lookout is a role i...\n", - " A Lookout is a role in Code4rena's competition...\n", - " Correct\n", + " A Lookout in the context of Code4rena's compet...\n", + " Incorrect\n", " Correct\n", " \n", " \n", @@ -1266,28 +1273,28 @@ "\n", " Bot answers \\\n", "0 To obtain +Backstage access, you need to meet ... \n", - "1 The findings are released and made public when... \n", - "2 The appropriate time to discuss findings is af... \n", - "3 To update your wallet address, follow these st... \n", - "4 Scouts are individuals who focus on scoping an... \n", - "5 The typical duration of the contest process, f... \n", - "6 To obtain certification, you need to submit th... \n", + "1 The audit report is published and audit issues... \n", + "2 You can discuss the findings after the audit r... \n", + "3 To update your wallet address, you need to:\\n\\... \n", + "4 Scouts in the context of Code4rena are individ... \n", + "5 Most audits typically run for 3-7 days. \n", + "6 The certification process is as follows:\\n\\n1.... \n", "7 Yes, it is possible to utilize bots for code a... \n", - "8 A Lookout is a role in Code4rena's competition... \n", + "8 A Lookout in the context of Code4rena's compet... \n", "\n", " Retrieval relevancy score Answer similarity score \n", - "0 Correct Correct \n", + "0 Correct Incorrect \n", "1 Correct Correct \n", - "2 Incorrect Incorrect \n", + "2 Incorrect Correct \n", "3 Correct Correct \n", "4 Correct Correct \n", - "5 Correct Correct \n", + "5 Correct Incorrect \n", "6 Correct Correct \n", "7 Correct Correct \n", - "8 Correct Correct " + "8 Incorrect Correct " ] }, - "execution_count": 234, + "execution_count": 417, "metadata": {}, "output_type": "execute_result" } @@ -1305,23 +1312,6 @@ "df" ] }, - { - "cell_type": "code", - "execution_count": 235, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bot Accuracy: 0.8888888888888888\n" - ] - } - ], - "source": [ - "print(f\"Bot Accuracy: {df['Answer similarity score'].value_counts()['Correct'] / len(df['Answer similarity score'])}\")" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1403,6 +1393,33 @@ " display_result(question, result)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### MultiQuery approach" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from langchain.chat_models import ChatOpenAI\n", + "# from langchain.retrievers.multi_query import MultiQueryRetriever\n", + "\n", + "# question = \"What are scout awards?\"\n", + "# llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "# multiquery_retriever = MultiQueryRetriever.from_llm(\n", + "# retriever=vectorstore.as_retriever(), llm=llm\n", + "# )\n", + "# import logging\n", + "\n", + "# logging.basicConfig()\n", + "# logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.INFO)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1437,53 +1454,14 @@ }, { "cell_type": "code", - "execution_count": 372, + "execution_count": 375, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide information about the different types of scout awards?', '2. How can I learn more about the various scout awards available?', '3. What is the significance of scout awards and how can one earn them?']\n" - ] - }, - { - "data": { - "text/plain": [ - "[Document(page_content=\"### How can I become a Judge?\\n\\nComplete [this form](https://code4rena.com/judge-application/) and share: Short bio/intro and summary of relevant experience, links that help demonstrate your expertise, 3 example submissions to Code4rena contests that were judged high severity, description of how each submission demonstrates your depth of knowledge.\\n\\n### How can I become a Scout?\\n\\nYou can’t, just yet! Right now, Scouts are hand-picked by the C4 team as it’s a highly sensitive role. We’re looking at the possibility of opening up this process, but not in the near future.\\n\\n### I want Code4rena to audit my project, where do I start?\\n\\nIt’s really simple! Just visit [this link](https://code4rena.typeform.com/i-want-an-audit) and fill out the form. Our team will be in touch with you shortly after you’ve completed it.\\n\\n### Do you have a blog?\\n\\nWe do indeed, [here](https://medium.com/code-423n4). We post product updates, sponsor interviews and more.\\n\\n### What’s the best way to stay up to date with Code4rena?\\n\\n[Follow us](https://twitter.com/code4rena) on Twitter and turn on notifications in our #announcements channel in [Discord](https://discord.gg/code4rena). \\n\\n### Where can I view the audit results?\\n\\nOnce an audit's results have been finalized, they’ll be shared in our #announcements channel in Discord. The audit's page in the ‘[Audits](https://code4rena.com/contests)’ section on our website will also be updated to show results.\\n\\n### Where can I read past Code4rena audit reports?\\n\\nWe push all public audit reports to the ‘[Reports](https://code4rena.com/reports)’ section on our website. This enables you to read through past findings, evaluate commonalities, and continue your learning journey. Our [GitHub](https://github.com/code-423n4/) repos are also public, for those of you interested in diving deeper.\", metadata={'source': 'g68-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//structure/frequently-asked-questions.md'}),\n", - " Document(page_content='#### Can I see some examples of how awards work?\\n\\nAwards for each contest are [posted on the Code4rena website](https://code4rena.com/contests). See [Numoen](https://code4rena.com/contests/2023-01-numoen-findings), for example. The award calculation for Numoen had the following parameters:\\n\\n* **Total awards: 50,000 USDC**\\n* Main award pool: 42,500 USDC\\n* QA pool: 5,000 USDC\\n* Gas pool: 2,500 USDC\\n\\nThe table below shows each unique high and medium severity finding (`H-XX`, `M-XX`), QA report (`Q-XX`), gas optimization report (`G-XX`), and the way each submission’s award was calculated:\\n\\n* `pie` is the number of shares assigned to that report or finding\\n* `split` is the number of times those shares were divided\\n* `slice` is the number of shares assigned for that warden’s finding\\n* each `award` is calculated by `shares * (pot / number_of_shares)`\\n\\n**Tribe Turbo awards**', metadata={'source': 'g22-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/curve-logic.md'}),\n", - " Document(page_content='# Incentive model and awards\\n\\nTo incentivize **wardens**, C4 uses a unique scoring system with two primary goals: reward contestants for finding unique bugs and also to make the audit resistant to Sybil attack. A secondary goal of the scoring system is to encourage contestants to form teams and collaborate.\\n\\n**Judges** are incentivized to review findings and decide their severity, validity, and quality by receiving a share of the prize pool themselves.\\n\\n## High and Medium Risk bugs\\n\\nContestants are given shares for bugs discovered based on severity, and those shares give the owner a pro rata piece of the pot:\\n\\n`Med Risk Shares: 3 * (0.9 ^ (findingCount - 1)) / findingCount`\\\\\\n`High Risk Shares: 10 * (0.9 ^ (findingCount - 1)) / findingCount`\\n\\nFindingCount represents the number of findings for a same specific bug.\\nPlease note that findings with partial credit as still count as 1 finding in the algorithm\\n\\nDuring awarding, each share is redeemed for: `pot / number of shares`.\\n\\n### Bonus for best / selected for report\\n\\nFor each unique High or Medium finding, the submission selected for inclusion in the audit report receives a 30% share bonus.\\n\\n### Duplicates getting partial credit\\n\\nAll issues which identify the same functional vulnerability will be considered duplicates regardless of effective rationalization of severity or exploit path.\\n\\nHowever, any submissions which do not identify or effectively rationalize the top identified severity case may be judged as “partial credit” and may have their shares in that finding’s pie divided by 2 or 4 at judge’s sole discretion (e.g. 50% or 25% of the shares of a satisfactory submission in the duplicate set).', metadata={'source': 'g17-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md'}),\n", - " Document(page_content='---\\ndescription: >-\\n This is a high level overview of the C4 awarding process. \\n---\\n\\n# Awarding process\\n\\nAt the conclusion of an audit, sponsors review wardens’ findings and express their opinions with regard to severity of issues. Judges evaluate input from both and make the ultimate decision in terms of severity and validity of issues. (See [How to judge an audit](../../roles/judges/how-to-judge-a-contest.md) for more detail.)\\n\\nIn making their determination, judges add labels to Github issues, while the original submission data (including the warden\\'s proposed severity rating) is preserved via a JSON data file. \\n\\nThe judge\\'s decisions are reviewed by the sponsoring project team and by [+backstage wardens](https://docs.code4rena.com/roles/certified-contributors/backstage-wardens) via a 48-hour QA process, to ensure fairness and quality. \\n\\nJudging data is used to generate the awards using Code4rena\\'s award calculation script, which factors in:\\n\\n- Risk level\\n- Validity\\n- Number of duplicates\\n- Grade (A, B, C; Satisfactory/Unsatisfactory)\\n- In some cases, \"partial duplicate\" status\\n\\n It should be possible to reverse engineer awards using a combination of two CSV files:\\n \\n - [`findings.csv`](https://code4rena.com/community-resources/findings.csv): valid Code4rena findings\\n - [`contests.csv`](https://code4rena.com/community-resources/contests.csv): Code4rena audits \\n\\nOnce awards are determined, we generate a CSV file enumerating funds to be sent. Distribution is then initiated using disperse.app and sent to multisig signers for completion of payment.', metadata={'source': 'g13-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/awarding-process.md'}),\n", - " Document(page_content='### Bug bounties vs C4 audits\\n\\n| Bug bounties | Competitive audits |\\n| ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |\\n| **Spec work.** No way to have confidence that the time invested will produce a payout. | **Guaranteed payouts.** Auditors know it’s highly likely they can find a bug that will make it worth their time. |\\n| **Dark forest.** Who knows how much competition there is right now? Or how mature the codebase is? | **Low-hanging fruit.** If a project is seeking an audit, it’s likely fresh code with clear opportunities to dig in. |\\n| **Grow on your own.** Researchers have to proactively look for ways to learn and level up their skills. | **Learning community.** Open, competitive audits let auditors compare everyone’s findings and learn new things every single week. |\\n| **Paradox of choice.** So many projects have bounties. How does an auditor choose which to focus on? | **Less FOMO.** C4 runs a handful of active audits at a time and wardens can RSVP to signal to each other which audits have more participants. |', metadata={'source': 'g2-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//README.md'}),\n", - " Document(page_content='### Getting an audit is courageous\\n\\nAn audit is an investment in a better long-term outcome for both the project undergoing the audit as well as its users and the community as a whole. \\n\\nIt requires tremendous courage to ask someone to look closely and find as many places where code could be improved. \\n\\nBecause of this, we treat sponsor projects with respect. We consider every finding discovered in an audit contest as a tool that we can use in the future to help others learn and better understand smart contract vulnerabilities. \\n\\n### Both wardens and sponsors have a voice in the process\\n\\nWe take sponsors’ feelings into consideration, working to help find ways to communicate and disclose the results of audits in a way that works as best as we can to eliminate the “blame and shame” approach and replace it with one that honors sponsors’ contribution toward making their project and the DeFi space more secure for everyone.\\n\\nOur contest process is transparent, with all issues reviewable on GitHub. We put an impartial judge in the position of listening to all players’ positions on a given issue and making a final determination.\\n\\n### We are a community\\n\\nWe trust that a community-driven approach with valuable incentives ensures enough coverage to give sponsors a meaningful and valuable audit without putting the burden on any one person to do a ’good enough job’ catching every bug—a burden that burns out traditional auditors.\\n\\nOur community chose the wolf as our symbol, and it’s fitting. Wolves are highly collaborative creatures who hunt in packs.\\n\\nRather than putting the pressure on individual auditors to catch all the vulnerabilities in a project, we ask competitors to catch the bugs that they can, and trust that the community will show up and contribute.', metadata={'source': 'g63-pl', 'url': 'https://github.com/code-423n4/docs/blob/main//philosophy/security-is-about-people.md'})]" - ] - }, - "execution_count": 372, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from langchain.chat_models import ChatOpenAI\n", - "from langchain.retrievers.multi_query import MultiQueryRetriever\n", - "\n", - "question = \"What are scout awards?\"\n", - "llm = ChatOpenAI(temperature=0)\n", - "retriever_from_llm = MultiQueryRetriever.from_llm(\n", - " retriever=vectorstore_with_sources.as_retriever(), llm=llm\n", - ")\n", - "import logging\n", - "\n", - "logging.basicConfig()\n", - "logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.INFO)\n", - "\n", - "unique_docs = retriever_from_llm.get_relevant_documents(query=question)\n", - "unique_docs" - ] + "outputs": [], + "source": [] }, { "cell_type": "code", - "execution_count": 357, + "execution_count": 422, "metadata": {}, "outputs": [], "source": [ @@ -1493,7 +1471,7 @@ "\n", "model = ChatOpenAI(model_name=\"gpt-4\", temperature=0)\n", "\n", - "qa = RetrievalQAWithSourcesChain.from_chain_type(model, chain_type=\"stuff\", retriever=vectorstore_with_sources.as_retriever(), return_source_documents=True)\n", + "qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(model, chain_type=\"stuff\", retriever=vectorstore_with_sources.as_retriever(), return_source_documents=True)\n", "\n", "\n", "def run_qa_with_sources(question, use_rephrased_questions=False):\n", @@ -1501,14 +1479,14 @@ " rephrased_question = None\n", "\n", " if not use_rephrased_questions:\n", - " result = qa({\"question\": question}, return_only_outputs=True)\n", + " result = qa_with_sources({\"question\": question}, return_only_outputs=True)\n", " else:\n", " \n", " rephrased_questions = generate_rephrased_questions(question)\n", "\n", " # Attempt each question until a valid result is found\n", " for q in rephrased_questions:\n", - " result = qa({\"question\": q}, return_only_outputs=True) \n", + " result = qa_with_sources({\"question\": q}, return_only_outputs=True) \n", " # If the model is unable to find an answer, it returns 'sorry' in the response, we try again with a different question\n", " if 'sorry' in result['answer'].lower():\n", " continue\n", @@ -1527,7 +1505,7 @@ " url = metadata['url']\n", " if source_id in source_ids:\n", " source_urls.add(url)\n", - " return dict(answer=answer, source_urls=source_urls, rephrased_question=rephrased_question)\n", + " return dict(answer=answer, source_urls=source_urls, rephrased_question=rephrased_question, source_docs=source_docs)\n", "\n", "def ask_with_sources(question, use_rephrased_questions=False):\n", " result = run_qa_with_sources(question, use_rephrased_questions)\n", @@ -1545,1144 +1523,242 @@ }, { "cell_type": "code", - "execution_count": 360, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", + "MAVA_MISANSWERED_QUES = [\n", + " \"what's a scout?\",\n", + " \"Am I allowed to use AI in an audit?\",\n", + " \"Can I change my Code4rena username?\",\n", + " \"How do I book a solo audit?\",\n", + " \"Do I need to be certified to participate in an audit?\",\n", + " \"How do bot races work?\",\n", + " \"Can I change my Code4rena profile name?\",\n", + " \"What are scout awards?\",\n", + " \"What are analysis reports?\",\n", + " \"what is an analysis finding?\",\n", + " \"My name wasn't in the award announcements. When can I check on my results?\",\n", + " \"How long does the certification process take?\",\n", + " \"How can I access findings.csv?\",\n", + " \"Can I use chatgpt?\"\n", + "]\n", + "\n", + "for q in MAVA_MISANSWERED_QUES:\n", + " ask_with_sources(q, use_rephrased_questions=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 425, + "metadata": {}, + "outputs": [], + "source": [ + "def auto_eval():\n", + " bot_answers = []\n", + " source_docs = []\n", + " for d in yaml_data:\n", + " result = run_qa_with_sources(d['question'])\n", + " bot_answers.append(result['answer'])\n", + " source_docs.append(result['source_docs'])\n", + " \n", + " predictions = [{'result': a} for a in bot_answers]\n", + "\n", + " answer_grades = grade_model_answer(yaml_data, predictions)\n", + "\n", + " retrieved_docs = []\n", + " for i, d in enumerate(yaml_data):\n", + " retrieved_doc_text = \"\"\n", + " for j, doc in enumerate(source_docs[i]):\n", + " retrieved_doc_text += \"Doc %s: \" % str(j + 1) + doc.page_content + \" \"\n", + " retrieved = {\"question\": d[\"question\"], \"answer\": d[\"answer\"], \"result\": retrieved_doc_text}\n", + " retrieved_docs.append(retrieved)\n", + "\n", + " retrieval_grades = grade_model_retrieval(yaml_data, retrieved_docs)\n", + "\n", + " df = pd.DataFrame({\n", + " \"question\": [d['question'] for d in yaml_data],\n", + " \"Mava correct answer (True value)\": [d['answer'] for d in yaml_data],\n", + " \"Bot answers\": [p['result'] for p in predictions],\n", + " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades],\n", + " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades]\n", + " })\n", + " print(f\"Bot Accuracy: {df['Answer similarity score'].value_counts()['Correct'] / len(df['Answer similarity score'])}\")\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 426, "metadata": {}, "outputs": [ - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: what's a scout?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: What is the definition of a scout?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "A scout in the context of Code4rena is a highly sensitive role that is currently hand-picked by the C4 team. The process of becoming a scout is not open to the public at this time.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", "text": [ - "https://github.com/code-423n4/docs/blob/main//structure/frequently-asked-questions.md\n" + "Bot Accuracy: 0.7777777777777778\n" ] }, { "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: Am I allowed to use AI in an audit?" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionMava correct answer (True value)Bot answersRetrieval relevancy scoreAnswer similarity score
0Hi, how can I get backstage access?To get backstage access, you need to become a ...The documents provided do not contain informat...IncorrectIncorrect
1how long does it take until findings are relea...Based on the context provided, the findings fr...The findings from the audit are typically rele...CorrectCorrect
2When can I talk about findings?You can talk about your findings after the con...You can talk about findings after they have be...IncorrectIncorrect
3How do I change my wallet address?To change your wallet address, follow these st...You can change your payment information at any...CorrectCorrect
4What are scouts?In the context of Code4rena, Scouts are indivi...In the context of Code4rena, scouts are indivi...CorrectCorrect
5How long does the contest process usually take?Based on the provided context, the contest pro...The contest process usually takes between 42 t...CorrectCorrect
6how does certification work?The certification process at Code4rena works i...Certification works through a process where an...CorrectCorrect
7Can I use bots to analyze code?Yes, you can use bots to analyze code. In fact...Yes, you can use bots to analyze code. Code4re...CorrectCorrect
8What is a lookout?In the context provided, a lookout is a role i...In the context of Code4rena, a lookout is a ro...IncorrectCorrect
\n", + "
" ], "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: Can I utilize AI technology during an audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Yes, you can utilize AI technology during an audit. However, using the output of AI tools like ChatGPT, GPT-3, or other automated tools for audit submissions is discouraged as it can lead to a high ratio of nonsense submissions. Automated tools can be used as a first pass, and findings from these tools can be built upon to identify high and medium severity issues. Code4rena also runs a Bot Race at the start of each audit, where AI-driven bots compete to create the highest quality and most thorough audit report. The winning report is shared with all wardens and all findings in the winning Bot Report are declared publicly known issues, and therefore ineligible for awards.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//roles/wardens/submission-policy.md, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: Can I change my Code4rena username?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: Is it possible to modify my Code4rena username?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The documents do not provide information on whether it is possible to modify a Code4rena username.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//roles/wardens/warden-auth.md, https://code4rena.com/help\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: How do I book a solo audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: What is the process for reserving a solo audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The process for reserving a solo audit involves the following steps:\n", - "\n", - "1. A project team member clicks the “Get a quote” button on a warden’s profile, and shares scoping details with the Code4rena team.\n", - "2. Code4rena staff then consult with the warden and project team to firm up scoping, pricing, and dates.\n", - "\n", - "When a solo audit is booked via a warden’s “Available for hire” function, both the warden and the project have direct access to Code4rena staff for assistance with legal agreements, scoping, pricing, and logistical planning, collection and disbursement of funds, and external accountability via a Code4rena Judge.\n", - "\n", - "If you are a Certified warden, you can make yourself available for solo audits by logging in to your user account on the Code4rena website, and opting in to the “Available for hire” function on the Settings screen.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//roles/wardens/solo-audits.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: Do I need to be certified to participate in an audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: Is certification required to take part in an audit?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Yes, certification is required to take part in an audit. Certified Wardens have the opportunity to book solo audits through Code4rena.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//roles/certified-contributors/README.md, https://github.com/code-423n4/docs/blob/main//roles/wardens/solo-audits.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: How do bot races work?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: What is the process of bot races?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The process of bot races involves two stages. In the first stage, known as the Qualifier Race, Bot Crews compete to have their bots deliver the highest quality and most thorough report based on a repo provided at start time. This stage is open for one hour, and judges choose the Top 20 Bots for upcoming competitions. In the second stage, known as the Bot Race, the first hour of each audit competition is dedicated to a Bot Race. Bot Race submissions are assessed by Lookouts, and graded A/B/C similarly to other reports based on report quality, validity, and thoroughness. The best Bot Race report will receive a 30% share bonus. The top report will be shared in the competition's channel and will be considered the official source for known issue submissions. The winning bot report is selected and shared with all wardens within 24 hours of the audit start time. The full set of issues identified by the best automated tools are considered out of scope for the audit and ineligible for awards.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://code4rena.com/how-it-works, https://code4rena.com/register/bot, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: Can I change my Code4rena profile name?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: Is it possible to modify my Code4rena profile name?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The documents do not provide information on whether it is possible to modify a Code4rena profile name.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://code4rena.com/register, https://code4rena.com/contests/2023-05-chainlink-cross-chain-services-ccip-and-arm-network, https://code4rena.com/contests/2023-01-reserve-contest, https://code4rena.com/help\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: What are scout awards?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: What is the meaning of scout awards?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Scout awards in the context of the Code4rena (C4) community are incentives given to wardens (participants) who find unique bugs in a project undergoing an audit. The awarding process involves judges who evaluate the severity and validity of the issues found. The awards are calculated based on several factors including risk level, validity, number of duplicates, grade (A, B, C; Satisfactory/Unsatisfactory), and in some cases, \"partial duplicate\" status. For each unique High or Medium finding, the submission selected for inclusion in the audit report receives a 30% share bonus. All issues which identify the same functional vulnerability are considered duplicates, but may receive partial credit. The awards are then distributed to the wardens.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/curve-logic.md, https://github.com/code-423n4/docs/blob/main//philosophy/security-is-about-people.md, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/awarding-process.md, https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: What are analysis reports?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: Can you explain what analysis reports are?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "Analysis reports are written submissions that provide a comprehensive review of a codebase or a system. They outline the approach taken in reviewing the code, observations or advice about architecture, mechanism, or approach, and any broader concerns like systemic risks or centralization risks. These reports provide high-level insights and advice that aren't necessarily covered by specific bugs. They are judged based on their quality and the top Analysis is selected for inclusion in the audit report. In the context of Code4rena audits, each warden is encouraged to submit an Analysis alongside their findings for each audit. Advanced-level Analyses compete for a portion of each audit's award pool, and are graded and awarded similarly to QA and Gas Optimization reports.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/README.md, https://github.com/code-423n4/docs/blob/main//awarding/judging-criteria/README.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: what is an analysis finding?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: What does the term 'analysis finding' mean?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "An 'analysis finding' refers to a part of an audit report that outlines the auditors' analysis of the codebase as a whole, including any observations or advice they have about architecture, mechanism, or approach, broader concerns like systemic risks or centralization risks, the approach taken in reviewing the code, and new insights and learnings from the audit. It provides auditors with an opportunity to contribute value through high level insights and advice that aren't necessarily covered by specific bugs.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//awarding/judging-criteria/README.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: My name wasn't in the award announcements. When can I check on my results?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: When will the award announcements be available for me to check my results?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The award announcements will be available for you to check your results approximately between 25 to 34 days after the audit submissions close.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//structure/our-process/README.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: How long does the certification process take?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: What is the duration of the certification process?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The certification process typically takes one business day after the Certified Contributor Application form is submitted, provided all necessary documents are readily available. However, it may take longer if the necessary documents need to be assembled. The C4 audit process, on the other hand, takes longer than the ideal timeline, with the actual average ranging from Day 1 for audit submissions close to Day 42-60 for the audit report to be published and audit issues to be made public.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//structure/our-process/README.md, https://github.com/code-423n4/docs/blob/main//roles/certified-contributors/README.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: How can I access findings.csv?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: What is the process for accessing findings.csv?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "To access findings.csv, you can visit the link https://code4rena.com/community-resources/findings.csv. This file contains valid Code4rena findings. It is used in combination with contests.csv to reverse engineer awards. \n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//awarding/incentive-model-and-awards/awarding-process.md\n" - ] - }, - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: Can I use chatgpt?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "REPHRASED: Is it possible for me to utilize chatgpt?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "The documents do not provide information on the possibility of utilizing chatgpt.\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" + " question \\\n", + "0 Hi, how can I get backstage access? \n", + "1 how long does it take until findings are relea... \n", + "2 When can I talk about findings? \n", + "3 How do I change my wallet address? \n", + "4 What are scouts? \n", + "5 How long does the contest process usually take? \n", + "6 how does certification work? \n", + "7 Can I use bots to analyze code? \n", + "8 What is a lookout? \n", + "\n", + " Mava correct answer (True value) \\\n", + "0 To get backstage access, you need to become a ... \n", + "1 Based on the context provided, the findings fr... \n", + "2 You can talk about your findings after the con... \n", + "3 To change your wallet address, follow these st... \n", + "4 In the context of Code4rena, Scouts are indivi... \n", + "5 Based on the provided context, the contest pro... \n", + "6 The certification process at Code4rena works i... \n", + "7 Yes, you can use bots to analyze code. In fact... \n", + "8 In the context provided, a lookout is a role i... \n", + "\n", + " Bot answers \\\n", + "0 The documents provided do not contain informat... \n", + "1 The findings from the audit are typically rele... \n", + "2 You can talk about findings after they have be... \n", + "3 You can change your payment information at any... \n", + "4 In the context of Code4rena, scouts are indivi... \n", + "5 The contest process usually takes between 42 t... \n", + "6 Certification works through a process where an... \n", + "7 Yes, you can use bots to analyze code. Code4re... \n", + "8 In the context of Code4rena, a lookout is a ro... \n", + "\n", + " Retrieval relevancy score Answer similarity score \n", + "0 Incorrect Incorrect \n", + "1 Correct Correct \n", + "2 Incorrect Incorrect \n", + "3 Correct Correct \n", + "4 Correct Correct \n", + "5 Correct Correct \n", + "6 Correct Correct \n", + "7 Correct Correct \n", + "8 Incorrect Correct " ] }, + "execution_count": 426, "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//structure/frequently-asked-questions.md, https://code4rena.com/, https://code4rena.com/@gpersoon, https://code4rena.com/@0xA5DF\n" - ] + "output_type": "execute_result" } ], "source": [ - "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", - "MAVA_MISANSWERED_QUES = [\n", - " \"what's a scout?\",\n", - " \"Am I allowed to use AI in an audit?\",\n", - " \"Can I change my Code4rena username?\",\n", - " \"How do I book a solo audit?\",\n", - " \"Do I need to be certified to participate in an audit?\",\n", - " \"How do bot races work?\",\n", - " \"Can I change my Code4rena profile name?\",\n", - " \"What are scout awards?\",\n", - " \"What are analysis reports?\",\n", - " \"what is an analysis finding?\",\n", - " \"My name wasn't in the award announcements. When can I check on my results?\",\n", - " \"How long does the certification process take?\",\n", - " \"How can I access findings.csv?\",\n", - " \"Can I use chatgpt?\"\n", - "]\n", - "\n", - "for q in MAVA_MISANSWERED_QUES:\n", - " ask_with_sources(q, use_rephrased_questions=True)" + "auto_eval()" ] }, { From c39146770261006f6e3592e29cddd33c734f6316 Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Thu, 14 Sep 2023 19:21:37 -0500 Subject: [PATCH 08/11] wip --- .gitignore | 4 +- ...t_qa.yaml => c4_mava_correct_ans_set.yaml} | 0 qa_bot/outputs/.gitkeep | 0 qa_bot/outputs/eval_results.xlsx | Bin 0 -> 11577 bytes qa_bot/qa_bot.ipynb | 838 ++++++++++++------ 5 files changed, 583 insertions(+), 259 deletions(-) rename qa_bot/knowledge_base/c4/{c4_test_qa.yaml => c4_mava_correct_ans_set.yaml} (100%) create mode 100644 qa_bot/outputs/.gitkeep create mode 100644 qa_bot/outputs/eval_results.xlsx diff --git a/.gitignore b/.gitignore index 0a7a692..6c4d9ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .idea .env -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints +.DS_Store +outputs/*.xlsx \ No newline at end of file diff --git a/qa_bot/knowledge_base/c4/c4_test_qa.yaml b/qa_bot/knowledge_base/c4/c4_mava_correct_ans_set.yaml similarity index 100% rename from qa_bot/knowledge_base/c4/c4_test_qa.yaml rename to qa_bot/knowledge_base/c4/c4_mava_correct_ans_set.yaml diff --git a/qa_bot/outputs/.gitkeep b/qa_bot/outputs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/qa_bot/outputs/eval_results.xlsx b/qa_bot/outputs/eval_results.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1cefc6c58db793941b33f78ce87dc4144767175b GIT binary patch literal 11577 zcmaKS19WBEw(U+*v2CMb+qPM8#kOtKj#;tIif!9QMHSnqBtPfg``&q{{`-How%2TX zwKjTNyRR|(oO6t>Cm~Ag3lGwv5aCnWH$yrGaJwS|b45RcGOvg)@7H{nu4(+Wo;ZjjsDJM{_ z!uWjMX(*Z(iw^fY9Z@cPC)#V?XXZ<8tdrtG_iWw+Hxd!$O-fAOL_=^&@xJmNV{{M} z+xsuTG|EkxZ>^!?2FN7tRjR~nz(+PY6IIWmA|gA_4(-yQ&zFfRC&Zya5+(ENO8519 zQzt~wc)(KwI%~~BMJDcYbO&Af_9w9sj^IEZ9{5G}9X8nDbs2LSKQMNm>UNe9oZ&(0 znFz}-OF>ipQd>bO9E!&%%T_nyx7HCoM3o+xa&876Kx zzRe2+o&Ju&_ozfk;gI%**ZIBI>RBO?d+`n%E|vC$g+U?POwVIs%O1I8Z|oogC_xVtKL&hTqLDW{N1hp{W&-06kLjM=9ar#j@2T z%U-J_NtnCxR?2QfqA?yq8W3sbAe*8<$SJurNh!U>rnA(wgc#ScF%Ybughj3UbYoZNq zj0-z_h@asW)M4@ASfg%{Cv)1@esqbwFlDui@%nh@nDn3g6h!sO$AbX?KacL}h zJn~5yxsBaI>^GInSvO!+`f1!ZkPk^|@W1_#XgfM>L?n_6)xSSnpCjVG{eGRx2J3k- zq~>E4U%8PqHFVzQ^=erDntNiWth0VTTBSpW^N_~6JF(`(6%6jQz~IcH(~eygt@V?O z_T40jq4xWx<4u<#zPi7@1Frg3A<4-GgN&d9v?LBXOEC`4endV4-3-ghmv4hWS7xKK z?O^Qk?&e_=BRU+b)se{3P!-b(5u?~B*qLUcOxg*y1^dCsh-uF2$S8Ab$$Q9%bGQlIm*F;j=pByR5#rc zn@RlLAYR38izv5j7LUZ^(3a#JIng6h_*`C1&E5@a<1?~bs-d7)fYZ1NIwW>58BGzg z8@7p{2RNQ-RvG7nV7F#)KG6*vWxYS@gbKO@POvOXToeTJynj3_=OV9~LzNH|J1Q~t zurU~rVaxhPbz&QWctiG6IngdWWuHITq>9U>8INejw;$-ngCOA8_M;wU`zb-OP87Xd zu|kk&rdW}5stb57NVSuqU&5GAVrSvFPI%Rv77d2EQ4R1%D@5#8cPc($HN%KcWP@F> zQkUCnXj4|&ztLi?3NNNYohX{(d)8r_Wfogba>HqwkyMDJx*WorXSigQah+U(-Drzb zJoNpnvx!!FosZvbx&RtH+U%>nhfMQY8>BpX1dxDJqMIw{6a)sMnU{9Q1`1QB#DbEd z%S!c#Qm4cPmZ~z2!+!^+egR=E)1keZMce(9_BlKNR1U3QGQbqZutZP=oJl>WB@l~t z+E^$O!@Q9wG=fzJX-+JS9(Y6|hdJOpkP3!-ak#GZV1In6lPRDehr6Tnq}|3l}3uq??XKUQ9f z51m8&dmjIn!bK^KeW*WD`wjZyrUii|sTNylzl^G|zo^9&#U`sdK2I(F>>J;SDwX|i zJydT8 z+gz^=&+=kT46UVhFtqKPh0>giiFF^JQd^9URUE7a>9e?)1)iBwUtwrnU+>#cpK22A z)n=k@&}z0EE%mvDm&VwlP)$Wa?->`;uFXp?uGUM5Kh^o~pjZMi1eLs^A%VcN)iwDt zLDMU^yd9M53FZLwi^QMPmiauHG?_&f7~CRD>V_A-p`a#1ne)B3Tm>qvrxwN|a9;$r zGSNCIh$9X#&!B&G-gAxD(IV{6Fg$Gg8ku-PJrLLnzoQ+e-G8>mjQ7`#Ft*1UlCF}L zMjf3_kvAK``?5ggBC%TT%kIqxC1Mo6gZoeMoUASOIYt2hj)eh$zcxW1i;I)Ffuo7B zinF7It(ntb!nvSr6N4+^_}1B#S0l9b2`m0&VaISd-uq-Blbi_XlMG7wvx;ku?7-DD zjblRu_*Re|2bj#BV;E6U+u&m%bCc^3hs0W>Utw6dyPj8B=!og`($%mu($ZMnzj|7l zi*PbYTgNb|^Y(tru72Zv+SlQ#P8Zf_WQv(`8_e{3*gK7L(lfB!CCILB*B8i`(CK7u za8+qzhws!}Zd@ejY8*?|ZW70|HHoY)U->mKKxo=PpYD{jR&^CK^?r7(<0AL;Fu5L# zNI#j)>-Z}{;tiVLnV^?1?dfxj>20d_TgRO=hb`>PLzkyT8#@Oip#Tj_L#=kwi8!Bz z%@rL#2EcJQKiCWRO#B3h2pP2vqj%&Vz8UqkfO zi=cTtbvLN_nlcrf5JLs}LPG|&ybE!M)h_1MHPDS0#AXOkuf8YU(*_1Hzh0rgkUk~_ z%;F+umbq9RO_w?0H!feTL|gis?HAqs${T0H7O}DM@?yLXjalK!-(NIhfCW3Ue4w$> z#G{{x|2|E)SJ#B}_O%~_v@;LHyD_M`+R7So=4=tI9{0@qRVNAlUagg8w2z9!uzDRA z%5@=4d~UCC<}Np7gv?1&s&;vFv*uX^XAvvT)HZK&@1bFk!J}nybDhR;8IfAGGBn_P z?ye@NyCR~N`S{Raa5L{#y=j1tf8gHfhmzG|Bj1khnt5LlA#TRY*V736u}@*H6B=30 zf}6u6Umb&nr{rvKzJ}t6!Yh(@so8Vkp@2-Q6+b-=Yv&ge-d&nOqkQ_LY%lNB)G{=b z>(!-0xE!N+m8;$77&8Upby*h0<##pB@?Tgy(j?U=`b&CA)!FV$DpOHKm0vQWd3mv)lw>*Jnor)9#8 zCNspMXg%8#p&!hXfgJ}Q&kOQo|EQca zn_3I)fr%Kicmwcl1P zA-ubulydV-2o0YOF~ivR+rd}3x=6+U-WEpp3?qic69f&fpt$B4-SIyj_7 zqvcQwS~gNnVi*l>w01n-)4_8Eaivt?Qc?~q*VAZmiyMRyB83H7XEEF)R z+OlP2?b4Tpc~7GB(3q#R9C5#r?3*N19%{;|o$$aZDmghLNUFE2u0j4gX^He(FSDps zhGnevb(L76XUVFI6;QAzr7}Rlib4RXgTya#y5o=+577C8`G=?!atx!FT+vV`;;s@D ztv*(@zPa1lS_yT!wi?*v7+bs=!$ek*zsDso-W&i3th_FYk2vT^2uI zGnKIO9xa28%Oti{DF^nu^2` z!JB-ygCtZ?9Imd|kCSgB18u94W}~`OyMDnbT5gPc<^#MsfS_v#dk0%{kb+V|gHS7ZntQSLyGvPd()Kh(@oX>yvvk%Mf>Q3Q5}DG{m(md8YPe#5 zMHb-9(oDL@%N1ppLJ1(|IKJ}cq&C?z(XUyPOP^#QQ2=jK$|c48l_w1&{c~6rB?7xX zK^Q54NM_03%E82mI<82N(j?mitx!?z={-UP5!OKLNO>y;Qtn~NV}pFxRFRh!I*e*F zj-LzPiE|HrhdWDFjFD$t(wQ|vdU8NT=4F1eFG+kDnd3Z0zusy!5<8_iamS$J`W!%0 zzttyZoFR&)#8!)62=$AozyyR)6nWe}sq{HSX1&Bf-ZBHkTt(bp7tJ0lmJVtQYn^6d zpnMrVSK17p-a8GIzbjqH#3x%OO|Rc5P$RBVDUPU?gzIarb=$n$Q~8P+uEj*P&V>OJ z!`UM>U*L_>UtMkcj)dNbkxr@pt! zUcR(mbwfpf)^@v?Ss!A<-o){9gT_gr@oefe0 zKE0u+=LH6%3dQDyXL*fcQHwKMUSsGr$G0CyghTYauXbVaSER<#=}_IPB_~;r$)T3)Ep!_`${XL0%3l zT2M}H^Sa+pa#(5Uh&4Z1LLL__gHt?Z{M)w~vMW7)4#-HdV6hdF6m?Z%Lv4idj(b9y z0qcrGB%E-c!$@B-DbWghk(~arC$FJgZbff~GT05ARYw^n?$i57R5UvhDd1ytDOrt! zZtbj14pv67FqFikgU7&-L;e6cbBihL!Fd@F*YOim9^H1+_?f*lp8=D0=)Pr9u5z{L)X6DkIkY6xmWF~-FGWfAEK zbf~Bw{3L|dfqG;&SI%y-+SUDpoq%EtOf(3l=gYodY3V_!ueI`fYJyyQA*VTt-2@(` zVeRH4^lw4;l*1wf1%ZfQoY*!4fyEnLP}ya*&TJg5?Ogn2LO6MZm`b&#=rz+sP{+3b zaZJTRBaBRY%jwi)6Ua=-oo-|N$J@Mvg;V?v&xe=L8py$ooMq;AeHC>rbssC3#a|_xX>MZ2Jucrn+i+TUuVwNQaBy8TimQH8y5Z}ic=};c zf7XT0pSWm-8G%T5Qu4T#lf+a~A>9$r0Wl_hO;>XJ zQ=vuIPKv0zZ+4n%E;q)q$6nG23HUSuqJ zZ+wV)WJ;hXG)hYbbAGr3d%mqcI&bsif&Z@4nlrtOF23@uJq!{V*~@%5L9NZGiDWe| zUo@eyX%!bHY(JNa#LKFyVBBVYPQJ)}N8Gdr@IfL-9+e8Q0nf_CZ7=Qc@$Pq9{ecPx zCJ?U?i-erT`2_sVxd#=ZW#&48z_jJ%i=-~E=q6dHE{t!q^7$$>2CxH?nYPA+B~sj@ zJK@LpSq6towtbwamE|cdomvYL!J+>E*^g$i>U1OrWVJrvjV)D9s4-5e^C?GL{J{*i z{`o$6d!JY4PXft^Jo*c2gmSXlU^{@7mcYzSj@G8T(1s@Tc9GF+kX?j4j<zc%4kw)qryg7|nd2%c z@Lkm!V&MgR96f&;Go?ZR9IDQM=VfOX zHF2^`W+psh2+L!Pe^1BPM@j~?{~C>8>Ly(crGntl8XQQ{Sw-Z=p)4k^0R$g9Qg0p) z<9V75C^#&Zc!@9VfrTPBH-*B)!7XbZwSY16h3t})5D+xO4wCW;5s&F1p)i@P8zzF! z&gB|hNt9oqXY7tIDI^rkI39-XgxIG6pMM2$_xLPSGufj$4s?Ikm~Z$N z+S*6hVn)7%ahh7}PTn-PVm0fs7xrl6TDt`Oqa$c6m*n^*Wj8vc7`K78WSVS?6cy|t zs7c%n7vpg+GntT-2y=MB?ONQn*!fism&NI-HLhRk>AEN?D{H_iw3Y;Jh0zbd+uAU4 zR3_u^#~gQrWbW2K1M9oAed=&wI+}F164TCWuzf_I6{U&W5lxcnKL?_?d6k6tld-XZ zGT#h}=Ij}Q`X&jBa(WbI00#53?1IfOi;xP*X_~%hTw9J_9;}fEuMp|`^T8IBUr61A z%EqZIk+|?Ubt`ln?HZ+p@DXqm?!vQW@O7v(k0s;{Y)HZ|BT6THpfB`9_j*yN_sNJr zK>&P0))75<6*cSEE`(KVZR*;RrhCM2w%`C;$D9a}v}g3vB^3{*>E-0sar$&oyqWC0 zM#Jr;`F#JpAgJeN;ByEdoUReX3C z*78{I>BoxulIB5GA;z}+BiCV?7#Vt5YQLb3Bv~U`k0VT?@UytGrv}#f(K-F#10m+R zpT}5C=y^+R;RU%%aNAnv_r&9bht|q7!Yur*M*Mqs--oLEJ%ru3;>KN9l#R~m*HE?F z1Mu^0Yj&9B9+l4?7#wP!Sit~MllFyGLs;Y!qfK_cMp|4YOjE_jpUP z44oV>gFDI0acaU0Maqw&-3EfzTS2IjmR0{WVVku)EJSOLKuEsWrKc(6?X6|acqI0rn?h786~NW>@_*Sr*%cvlM-egm_Vs8VjRQ|C zp?nN-Vgb(J#Pnu#@+ro_l%=)UsqT|$0`Wz5;j9D7h2qIhU!%y#8q-~-kW+%cR$cbH z^Xu(EHov))>h{)P+nMAZ24^X_l z1K7-~1-GcaI1g2A+3qC_U!2@H?HtPx>xP&(JeSaoFwK&mT~W{1ohck^49)Hr z2Q)H(5%cH13YgM&oE&ab^iI$-&=bnicV>Os=Y`f^XS;|g(c8!a4`Fc=uPj~EHx}j} zF!qp;E;+3o7to)4VW#Xhn^L?&Wa3X4zUXi_$CBGAkBTV_QRwXXY?7m#!Zhj0v;C8T zyLZT|;u*WF;9-JR-@j3T=F)WX>GM0}KWk&2CLqXPK8lfd5C8!De`;gS9@ZxRDvEWe z$=R(j!gX9|K($uas#F|8tPQXeE&EqePNOZW)%+4I;fG5_h<|%liZ|ks14HCHHB_1R zB72n_S=rE|sYR_yTHIspkfuZ~s!w7~&)~02CbXln2uKIgSc%Fb<`udPgjIwDp~Pqt z`+xm3x9`q0ZGEXF4#vMXD z+L;SR(o1o((G3A(oxsx-D-wm5w0Eq#v-<&mlMYbNSCZaWrvfR z|K|4fC7BWNIDg6zj5?7KR&2axRMR@R03}JdRD$qG!6$F7lcH!&240{%V`t_j zlQ)gh)^3o|p@%*J15+xPBZ_<7&694O8E<&zgom%H6MV7xt!!`5+<+n&(o853={(|7 zR{$aL8xDx(^OY#LPvHcBd^CA#=Mmgmf7ggwI?+6D@~&;3_{CGb=oXX*f2D(Y5i`D1 znSxJ)f=|i7D$`r2Y$;bX%>GsJI31VJ0nin~oHp8=it2GwN&Yv<%)k=rKaO??<4B=xCNJ5p~btw?CJz*z?#&{RaBa6vTQKSN#DF04U@ByBqyco^>`iu`&7g z^rf5{2iK?_p_LYK=v&Qm``jVkF?ugB39 zcu#aD7hGgi|%a{;P(BpQfqFbO5^ez^n&i>=gPTjI(mCkH^=wOR~lTo zCg7A1a{h=rbmo8)K`NJV{|Y>5JNV@yEeHms_E=RQzf!p)-QYvZAs7)Ny*jIqWm-J) zJ;>6)m7-z>ba(nED>opWKD~`eZR{<$oZ8wm<$xkG>~Q&kK)dG~T8HP`-rjayLYP1{ zuc!0%5rg?Xe5~_mgym>1>5ApQlF|7QN%gpT56um-IUs!IBWXuND%1X7hPm z-I@7$zb^xRVz`&|L9$YyY>*5c`VW0g7Xn}dZ9%3sE&?Mz^T>G?jE!Bq zFog>0n5`(aB;y7x6veE?dA^Hx@sz^t% zjM1JTakHXM+Ojmq=Z1%(gFh~6EOf+R#D*yL3r$D=#21jQ=_Y1v%!R4Ts+vx&~JB?Bv2YeSV)XZXSSO_ zNkn-5k;>qh_avJoZm$U}-x3bEu_9}JPANNN?4=5KaD>jK)eJ)^)ajM#)*9gvkXL5p z|9N9-CMamsFcf6imYWTKV12Dd=^_*r+0)vKqp>NUq5@b-+;=y%*i4!E)7lg`a4-4J_$EvmR&a>KLmL13LvrQ(^(at*hPCIP4 zvJI8=y(3p`L96gkPLjq0?cQ#^P1M);z%fp6UuG-8ln|-4Yc-2etB9xip8a}fOB(p9 zaLEu=o-g9yYQ9$Y$7Gs^`VZ^lh?qwHxww_ZV)?&*M`$ea+4cj#vEJ}k0mVm3WtPc!nNAitcV;&pC~?m0gHv_sF{NqSUapL zOzZVC<48Ih!>4>piM54M8In~rP`Fqnj>uR)$+a^yRb?r=nnm0sJGGoa{U~Rp=olyT_5E^hw<^VZk^m7SaQ$YSq-ycDL851Ht<-)}!e+zT zVMF^xDZO_a%tU{>x%6_yuoM3hESx`0ArT7YO1Yf2>uDd!+16svq~dLb$r3qm5$LYJ zcUj8SR@1v;7UAV&<^`eeW!AQ;N3W#KGrU(`D3JAd$v?aq5HuT1rP(g$36PjLKV<;h zGGdTydV~0q`yV7cH1OZqa($W_v zb{RaH!sI%HR*bi%z($m;(8jHXSQaI~6hx4U?fZZv{UW2+(i3IPl1JL@IR;xhIBu~D z`8JG+O28Fv@)kLH@+VRu7;t!5=W1syTKa5*pqaY z_Bw%gmSsd{C{az8v+nh*G0kzR;6GY^aA7S~UoQxA1anCbA`(;}b8n}Qm@37&Y_iXC zU~sZ0FJO>!4%rO}RM`&p%**HH-J1`6W^q`OrT(=F^_9(%9JA*%RH?>`X??yoJ2ZI) zmSQ~;FIn8S1x4xiEfel3>Dq3>x54i{fj<24U+GeF!AXB%Fht&DyR~QIq zh#zy|2mk&qS&C0a*|-;Mc61NlC}*b1S!BLV+2CYCMwN3OpvJ1rB7JH7G|M-onJTb+ zZl*MF|4a)T;#lv)FZAv!TWbXHjiLtVN1;Y>_6@!h2CT{96^U|(O9SyI^ClC+YjS)! zkdp|?g@1Zmg4?nmr{S_GAEnR`W__J)9zEMCx>RHn&jC6tZ0YVFiDUtWsFyiTW^AS! zb>A0zD3Q5Von3n&sOq-5 zor3OIL?K}R96Ht$MssEZAyBE~+w$Y^n0bz>2PUc;nu+u;bU05kcsw*3P5x>#XU32Hi8t5Wi4r3w(BvF)g%1wjuu~xqV17;Kh z=hSIRrjhZq7Tw5Mf-PX})Nz04`A2|Ca`noF+;8Fr));xk&Xf1#1~$*BD-%kueHB^Z zumK!cc*c;skCqlULk^mf;sY`a%!LB45zM4Q8T1gCuxc#B7e2*z1C3gdUtq%GZM5cFwnlR-?9V zI2BuSxspOw(-$+-di#M2GBt~%z0BZaS|4SsE}R_#)vmfEX5_*{^O*n&LKap+3oEDO z4(ecnV&hDpIF}*hS}s;Syrfd+<6|g1Aa~2iDOTk|H62zHE|g3@%KMYlGn#|ZFev{b z4f+x0j-fO!;}7WF)&m;fOMHdrIZkkl)i(7$kkf6RpBe`-7pZEqyyp-;X^!2q+rZ|Jzgg@uvRu z@elZG_+K2Re**kziu`vt0N@|s|MB+!heh(wz(2i%e+R~YOvC@TlkiWRKRtGTbUz83UOL;pO7{T&)j@UPH6Pi21s{JGBm4PZz1-+%r8x9a~1_@|uy4XF9~e+a>UiR+)? ke=5h{;gyvCBm6%)q9_CYQ3LtQkP8c#`A8gP>c4*dA0*u(ssI20 literal 0 HcmV?d00001 diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index a4386b4..a988abe 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 497, "metadata": {}, "outputs": [ { @@ -43,64 +43,64 @@ "output_type": "stream", "text": [ "Requirement already satisfied: langchain[llms] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.0.278)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.20)\n", + "Requirement already satisfied: PyYAML>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (6.0.1)\n", + "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.31)\n", + "Requirement already satisfied: pydantic<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.10.12)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.0.3)\n", + "Requirement already satisfied: requests<3,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.31.0)\n", "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (8.2.3)\n", + "Requirement already satisfied: numpy<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.25.2)\n", "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.8.5)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.0.3)\n", "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.5.14)\n", "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (3.8.5)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.20)\n", - "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.31)\n", - "Requirement already satisfied: numpy<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.25.2)\n", - "Requirement already satisfied: requests<3,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.31.0)\n", - "Requirement already satisfied: pydantic<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.10.12)\n", - "Requirement already satisfied: PyYAML>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (6.0.1)\n", - "Requirement already satisfied: clarifai>=9.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (9.7.6)\n", - "Requirement already satisfied: huggingface_hub<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.16.4)\n", "Requirement already satisfied: openai<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.28.0)\n", + "Requirement already satisfied: torch<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.1)\n", "Requirement already satisfied: openlm<0.0.6,>=0.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.5)\n", - "Requirement already satisfied: nlpcloud<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.1.44)\n", + "Requirement already satisfied: cohere<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.21)\n", "Requirement already satisfied: manifest-ml<0.0.2,>=0.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.1)\n", + "Requirement already satisfied: nlpcloud<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.1.44)\n", + "Requirement already satisfied: clarifai>=9.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (9.7.6)\n", + "Requirement already satisfied: huggingface_hub<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.16.4)\n", "Requirement already satisfied: transformers<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.32.1)\n", - "Requirement already satisfied: torch<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.1)\n", - "Requirement already satisfied: cohere<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.21)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.3.1)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.4.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (6.0.4)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.9.2)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (3.2.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (23.1.0)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", - "Requirement already satisfied: tritonclient==2.34.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (2.34.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.4.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (6.0.4)\n", "Requirement already satisfied: clarifai-grpc>=9.7.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (9.7.6)\n", "Requirement already satisfied: rich==13.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (13.4.2)\n", + "Requirement already satisfied: tritonclient==2.34.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (2.34.0)\n", "Collecting tqdm==4.64.1\n", " Using cached tqdm-4.64.1-py2.py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (2.16.1)\n", "Requirement already satisfied: python-rapidjson>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tritonclient==2.34.0->clarifai>=9.1.0->langchain[llms]) (1.10)\n", "Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (6.8.0)\n", - "Requirement already satisfied: fastavro==1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (1.8.2)\n", "Requirement already satisfied: urllib3<3,>=1.26 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.0.4)\n", "Requirement already satisfied: backoff<3.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.2.1)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (3.20.1)\n", + "Requirement already satisfied: fastavro==1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (1.8.2)\n", "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (0.9.0)\n", - "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (3.12.3)\n", - "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (2023.6.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (3.20.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (4.7.1)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (2023.6.0)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (3.12.3)\n", + "Requirement already satisfied: dill>=0.3.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (0.3.7)\n", "Requirement already satisfied: sqlitedict>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (2.1.0)\n", "Requirement already satisfied: redis>=4.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (5.0.0)\n", - "Requirement already satisfied: dill>=0.3.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (0.3.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (2023.7.22)\n", "Requirement already satisfied: greenlet!=0.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain[llms]) (2.0.2)\n", "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1.2)\n", - "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1)\n", "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (1.12)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.3.3)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.13.3)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (2023.8.8)\n", - "Requirement already satisfied: protobuf>=3.20.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (4.24.2)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.13.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.3.3)\n", "Requirement already satisfied: grpcio>=1.44.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.57.0)\n", + "Requirement already satisfied: protobuf>=3.20.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (4.24.2)\n", "Requirement already satisfied: googleapis-common-protos>=1.53.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.60.0)\n", "Requirement already satisfied: zipp>=0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5,>=4->langchain[llms]) (3.16.2)\n", "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (1.0.0)\n", @@ -119,42 +119,42 @@ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: Scrapy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.10.1)\n", - "Requirement already satisfied: tldextract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (3.4.4)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1)\n", - "Requirement already satisfied: PyDispatcher>=2.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.0.7)\n", + "Requirement already satisfied: itemloaders>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.1.0)\n", "Requirement already satisfied: cssselect>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.2.0)\n", - "Requirement already satisfied: Twisted<23.8.0,>=18.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (22.10.0)\n", - "Requirement already satisfied: service-identity>=18.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1.0)\n", - "Requirement already satisfied: pyOpenSSL>=21.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.2.0)\n", + "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (63.2.0)\n", + "Requirement already satisfied: lxml>=4.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (4.9.3)\n", "Requirement already satisfied: w3lib>=1.17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.1.2)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (41.0.3)\n", + "Requirement already satisfied: PyDispatcher>=2.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.0.7)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1)\n", + "Requirement already satisfied: zope.interface>=5.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (6.0)\n", + "Requirement already satisfied: pyOpenSSL>=21.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.2.0)\n", + "Requirement already satisfied: service-identity>=18.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1.0)\n", + "Requirement already satisfied: tldextract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (3.4.4)\n", "Requirement already satisfied: protego>=0.1.15 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.3.0)\n", "Requirement already satisfied: queuelib>=1.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.6.2)\n", - "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (41.0.3)\n", - "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (63.2.0)\n", "Requirement already satisfied: parsel>=1.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.8.1)\n", - "Requirement already satisfied: zope.interface>=5.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (6.0)\n", - "Requirement already satisfied: itemloaders>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.1.0)\n", "Requirement already satisfied: itemadapter>=0.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.8.0)\n", - "Requirement already satisfied: lxml>=4.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (4.9.3)\n", + "Requirement already satisfied: Twisted<23.8.0,>=18.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (22.10.0)\n", "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->Scrapy) (1.15.1)\n", "Requirement already satisfied: jmespath>=0.9.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from itemloaders>=1.0.1->Scrapy) (1.0.1)\n", - "Requirement already satisfied: pyasn1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.5.0)\n", "Requirement already satisfied: pyasn1-modules in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.3.0)\n", + "Requirement already satisfied: pyasn1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.5.0)\n", "Requirement already satisfied: attrs>=19.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (23.1.0)\n", + "Requirement already satisfied: hyperlink>=17.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (21.0.0)\n", "Requirement already satisfied: constantly>=15.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (15.1.0)\n", "Requirement already satisfied: typing-extensions>=3.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (4.7.1)\n", "Requirement already satisfied: Automat>=0.8.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", - "Requirement already satisfied: hyperlink>=17.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (21.0.0)\n", "Requirement already satisfied: incremental>=21.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", - "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.3)\n", - "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", "Requirement already satisfied: requests>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (2.31.0)\n", + "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.3)\n", "Requirement already satisfied: requests-file>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (1.5.1)\n", + "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Automat>=0.8.0->Twisted<23.8.0,>=18.9.0->Scrapy) (1.16.0)\n", "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->Scrapy) (2.21)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (3.2.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", @@ -171,80 +171,80 @@ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: unstructured[all-docs] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.10.11)\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", "Requirement already satisfied: nltk in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.8.1)\n", + "Requirement already satisfied: tabulate in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.14)\n", + "Requirement already satisfied: python-magic in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.4.27)\n", "Requirement already satisfied: filetype in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", "Requirement already satisfied: chardet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (5.2.0)\n", - "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.12.2)\n", "Requirement already satisfied: requests in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.31.0)\n", - "Requirement already satisfied: python-magic in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.4.27)\n", "Requirement already satisfied: emoji in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.8.0)\n", - "Requirement already satisfied: tabulate in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.9.0)\n", - "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.14)\n", - "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", - "Requirement already satisfied: openpyxl in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.1.2)\n", - "Requirement already satisfied: python-docx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.8.11)\n", - "Requirement already satisfied: ebooklib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.18)\n", - "Requirement already satisfied: msg-parser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", - "Requirement already satisfied: unstructured-inference in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.19)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.12.2)\n", + "Requirement already satisfied: pypandoc in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.11)\n", + "Requirement already satisfied: pdf2image in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.16.3)\n", "Requirement already satisfied: Pillow<10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (9.5.0)\n", + "Requirement already satisfied: python-docx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.8.11)\n", "Requirement already satisfied: pdfminer.six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (20221105)\n", - "Requirement already satisfied: pdf2image in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.16.3)\n", - "Requirement already satisfied: markdown in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.4.4)\n", - "Requirement already satisfied: pypandoc in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.11)\n", - "Requirement already satisfied: python-pptx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.22)\n", "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.1.0)\n", + "Requirement already satisfied: python-pptx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.22)\n", "Requirement already satisfied: xlrd in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: msg-parser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: openpyxl in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: markdown in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.4.4)\n", + "Requirement already satisfied: ebooklib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.18)\n", + "Requirement already satisfied: unstructured-inference in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.19)\n", "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->unstructured[all-docs]) (2.4.1)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ebooklib->unstructured[all-docs]) (1.16.0)\n", "Requirement already satisfied: olefile>=0.46 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from msg-parser->unstructured[all-docs]) (0.46)\n", - "Requirement already satisfied: joblib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (1.3.2)\n", + "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", "Requirement already satisfied: regex>=2021.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (2023.8.8)\n", + "Requirement already satisfied: joblib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (1.3.2)\n", "Requirement already satisfied: click in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (8.1.7)\n", - "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", "Requirement already satisfied: et-xmlfile in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from openpyxl->unstructured[all-docs]) (1.1.0)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.25.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.25.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2.8.2)\n", - "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", "Requirement already satisfied: charset-normalizer>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (3.2.0)\n", "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (41.0.3)\n", "Requirement already satisfied: XlsxWriter>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-pptx->unstructured[all-docs]) (3.1.2)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2023.7.22)\n", - "Requirement already satisfied: opencv-python!=4.7.0.68 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.8.0.76)\n", - "Requirement already satisfied: onnxruntime in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (1.15.1)\n", "Requirement already satisfied: huggingface-hub in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.16.4)\n", - "Requirement already satisfied: python-multipart in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.0.6)\n", - "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.32.1)\n", "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.3.4)\n", + "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.32.1)\n", + "Requirement already satisfied: onnxruntime in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: python-multipart in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.0.6)\n", + "Requirement already satisfied: opencv-python!=4.7.0.68 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.8.0.76)\n", "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.15.1)\n", "Requirement already satisfied: packaging>=17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->unstructured[all-docs]) (23.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.13.3)\n", - "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.3)\n", "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.3.3)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (4.7.1)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.3)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (4.7.1)\n", "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json->unstructured[all-docs]) (1.0.0)\n", - "Requirement already satisfied: pdfplumber in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.10.2)\n", "Requirement already satisfied: scipy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.11.2)\n", + "Requirement already satisfied: pdfplumber in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.10.2)\n", "Requirement already satisfied: iopath in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.1.10)\n", "Requirement already satisfied: pytesseract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.3.10)\n", "Requirement already satisfied: effdet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.4.1)\n", - "Requirement already satisfied: torchvision in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.15.2)\n", "Requirement already satisfied: torch in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: torchvision in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.15.2)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (23.5.26)\n", "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (1.12)\n", - "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (4.24.2)\n", "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (15.0.1)\n", - "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (23.5.26)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (4.24.2)\n", "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.21)\n", "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime->unstructured-inference->unstructured[all-docs]) (10.0)\n", + "Requirement already satisfied: timm>=0.9.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.9.6)\n", "Requirement already satisfied: pycocotools>=2.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.7)\n", "Requirement already satisfied: omegaconf>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.3.0)\n", - "Requirement already satisfied: timm>=0.9.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.9.6)\n", "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1)\n", "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1.2)\n", "Requirement already satisfied: portalocker in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.7.0)\n", @@ -253,21 +253,21 @@ "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.9.3)\n", "Requirement already satisfied: matplotlib>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.7.2)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.1.3)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.11.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.4.5)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.42.1)\n", "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.0.9)\n", "Requirement already satisfied: contourpy>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.1.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.42.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.4.5)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.11.0)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: tiktoken in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.0)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", "Requirement already satisfied: requests>=2.26.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2.31.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", @@ -282,144 +282,144 @@ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: notebook in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (7.0.3)\n", - "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (6.3.3)\n", "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (0.2.3)\n", + "Requirement already satisfied: jupyterlab<5,>=4.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (4.0.5)\n", + "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (6.3.3)\n", "Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.24.0)\n", "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.7.3)\n", - "Requirement already satisfied: jupyterlab<5,>=4.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (4.0.5)\n", - "Requirement already satisfied: jupyter-events>=0.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", - "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (4.0.0)\n", + "Requirement already satisfied: nbformat>=5.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.2)\n", + "Requirement already satisfied: websocket-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.6.2)\n", + "Requirement already satisfied: terminado>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", "Requirement already satisfied: pyzmq>=24 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (25.1.1)\n", - "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", "Requirement already satisfied: prometheus-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", - "Requirement already satisfied: websocket-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.6.2)\n", + "Requirement already satisfied: argon2-cffi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1.0)\n", + "Requirement already satisfied: jupyter-server-terminals in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.4.4)\n", + "Requirement already satisfied: nbconvert>=6.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.8.0)\n", + "Requirement already satisfied: overrides in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.4.0)\n", + "Requirement already satisfied: jupyter-events>=0.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (4.0.0)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", "Requirement already satisfied: traitlets>=5.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.0)\n", "Requirement already satisfied: jupyter-client>=7.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (8.3.1)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", - "Requirement already satisfied: terminado>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", "Requirement already satisfied: send2trash>=1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.8.2)\n", - "Requirement already satisfied: jupyter-server-terminals in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.4.4)\n", - "Requirement already satisfied: nbformat>=5.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.2)\n", - "Requirement already satisfied: overrides in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.4.0)\n", - "Requirement already satisfied: argon2-cffi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1.0)\n", - "Requirement already satisfied: nbconvert>=6.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.8.0)\n", "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.3.1)\n", - "Requirement already satisfied: jupyter-lsp>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.2.0)\n", - "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", "Requirement already satisfied: async-lru>=1.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.4)\n", "Requirement already satisfied: tomli in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.1)\n", - "Requirement already satisfied: json5>=0.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (0.9.14)\n", + "Requirement already satisfied: jupyter-lsp>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.2.0)\n", + "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.1)\n", "Requirement already satisfied: jsonschema>=4.17.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (4.19.0)\n", - "Requirement already satisfied: babel>=2.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.12.1)\n", + "Requirement already satisfied: json5>=0.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (0.9.14)\n", "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.31.0)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", - "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", + "Requirement already satisfied: babel>=2.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.12.1)\n", "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.1.3)\n", + "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab<5,>=4.0.2->notebook) (4.7.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook) (2.1.3)\n", "Requirement already satisfied: referencing>=0.28.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.30.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.1)\n", "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.10.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-client>=7.4.4->jupyter-server<3,>=2.4.0->notebook) (2.8.2)\n", "Requirement already satisfied: platformdirs>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->jupyter-server<3,>=2.4.0->notebook) (3.10.0)\n", "Requirement already satisfied: rfc3339-validator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.4)\n", "Requirement already satisfied: python-json-logger>=2.0.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (2.0.7)\n", - "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", "Requirement already satisfied: rfc3986-validator>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.1)\n", - "Requirement already satisfied: mistune<4,>=2.0.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (3.0.1)\n", - "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", - "Requirement already satisfied: bleach!=5.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (6.0.0)\n", - "Requirement already satisfied: jupyterlab-pygments in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.2.2)\n", + "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", "Requirement already satisfied: defusedxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.7.1)\n", - "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.5.0)\n", - "Requirement already satisfied: tinycss2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.2.1)\n", "Requirement already satisfied: nbclient>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.8.0)\n", + "Requirement already satisfied: tinycss2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.2.1)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.5.0)\n", + "Requirement already satisfied: jupyterlab-pygments in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.2.2)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", "Requirement already satisfied: pygments>=2.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.16.1)\n", + "Requirement already satisfied: bleach!=5.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (6.0.0)\n", + "Requirement already satisfied: mistune<4,>=2.0.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (3.0.1)\n", "Requirement already satisfied: fastjsonschema in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook) (2.18.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2.0.4)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (3.2.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", "Requirement already satisfied: ptyprocess in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from terminado>=0.8.3->jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", "Requirement already satisfied: argon2-cffi-bindings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (21.2.0)\n", + "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", + "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", "Requirement already satisfied: debugpy>=1.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.6.7.post1)\n", "Requirement already satisfied: ipython>=7.23.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (8.14.0)\n", - "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", + "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", "Requirement already satisfied: psutil in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.9.5)\n", "Requirement already satisfied: nest-asyncio in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.5.7)\n", - "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", - "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", "Requirement already satisfied: six>=1.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.16.0)\n", "Requirement already satisfied: webencodings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.5.1)\n", + "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", + "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", "Requirement already satisfied: stack-data in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.6.2)\n", - "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (3.0.39)\n", + "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", "Requirement already satisfied: pexpect>4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", - "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", - "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", "Requirement already satisfied: decorator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.1.1)\n", - "Requirement already satisfied: uri-template in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.3.0)\n", - "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", - "Requirement already satisfied: fqdn in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.5.1)\n", "Requirement already satisfied: isoduration in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (20.11.0)\n", "Requirement already satisfied: webcolors>=1.11 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.13)\n", + "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", + "Requirement already satisfied: fqdn in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.5.1)\n", + "Requirement already satisfied: uri-template in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.3.0)\n", "Requirement already satisfied: cffi>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (1.15.1)\n", "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.4.1)\n", "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (2.21)\n", "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.8.3)\n", "Requirement already satisfied: wcwidth in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.6)\n", "Requirement already satisfied: arrow>=0.15.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from isoduration->jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.2.3)\n", - "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", "Requirement already satisfied: asttokens>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (2.2.1)\n", + "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", "Requirement already satisfied: executing>=1.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.2.0)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: chromadb in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.8)\n", "Requirement already satisfied: pydantic<2.0,>=1.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.10.12)\n", - "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.7.2)\n", - "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.23.2)\n", + "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", + "Requirement already satisfied: pypika>=0.48.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.48.9)\n", + "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.3.0)\n", "Requirement already satisfied: numpy>=1.21.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.25.2)\n", "Requirement already satisfied: tokenizers>=0.13.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.13.3)\n", - "Requirement already satisfied: pypika>=0.48.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.48.9)\n", - "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", "Collecting tqdm>=4.65.0\n", " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", - "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.15.1)\n", - "Requirement already satisfied: posthog>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.0.2)\n", "Requirement already satisfied: importlib-resources in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (6.0.1)\n", - "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", - "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.3.0)\n", + "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.0.1)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.23.2)\n", "Requirement already satisfied: fastapi<0.100.0,>=0.95.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.99.1)\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.15.1)\n", "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.7.1)\n", - "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.0.1)\n", + "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.7.2)\n", + "Requirement already satisfied: posthog>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.0.2)\n", + "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from fastapi<0.100.0,>=0.95.2->chromadb) (0.27.0)\n", - "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.2)\n", - "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", - "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.2)\n", "Requirement already satisfied: python-dateutil>2.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n", - "Requirement already satisfied: monotonic>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", "Requirement already satisfied: backoff>=1.10.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: monotonic>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", "Requirement already satisfied: certifi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", - "Requirement already satisfied: h11>=0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", "Requirement already satisfied: click>=7.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.7)\n", - "Requirement already satisfied: httptools>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", + "Requirement already satisfied: h11>=0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", "Requirement already satisfied: python-dotenv>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", "Requirement already satisfied: watchfiles>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n", + "Requirement already satisfied: httptools>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\n", "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", "Requirement already satisfied: websockets>=10.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.3)\n", "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (4.0.0)\n", "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n", "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.3)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n", "Installing collected packages: tqdm\n", " Attempting uninstall: tqdm\n", " Found existing installation: tqdm 4.64.1\n", @@ -432,13 +432,21 @@ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.1.0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (1.25.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2.8.2)\n", - "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting rank_bm25\n", + " Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)\n", + "Requirement already satisfied: numpy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rank_bm25) (1.25.2)\n", + "Installing collected packages: rank_bm25\n", + "Successfully installed rank_bm25-0.2.2\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } @@ -457,7 +465,8 @@ "!pip install GitPython\n", "!pip install notebook\n", "!pip install chromadb\n", - "!pip install pandas" + "!pip install pandas\n", + "!pip install rank_bm25" ] }, { @@ -685,7 +694,7 @@ }, { "cell_type": "code", - "execution_count": 322, + "execution_count": 465, "metadata": {}, "outputs": [ { @@ -972,14 +981,14 @@ }, { "cell_type": "code", - "execution_count": 228, + "execution_count": 574, "metadata": {}, "outputs": [], "source": [ "import yaml\n", "\n", "# load yaml file\n", - "with open('knowledge_base/c4/c4_test_qa.yaml') as file:\n", + "with open('knowledge_base/c4/c4_mava_correct_ans_set.yaml') as file:\n", " # The FullLoader parameter handles the conversion from YAML\n", " # scalar values to Python the dictionary format\n", " yaml_data = yaml.load(file, Loader=yaml.FullLoader)\n", @@ -1384,54 +1393,36 @@ "\n", "\n", "def call_hyde_llm(question):\n", - " result = qa({\"query\": question})\n", - " result['rephrased_question'] = None\n", - " return result\n", - "\n", - "def ask_hyde(question):\n", - " result = call_hyde_llm(question)\n", - " display_result(question, result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### MultiQuery approach" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from langchain.chat_models import ChatOpenAI\n", - "# from langchain.retrievers.multi_query import MultiQueryRetriever\n", - "\n", - "# question = \"What are scout awards?\"\n", - "# llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", - "# multiquery_retriever = MultiQueryRetriever.from_llm(\n", - "# retriever=vectorstore.as_retriever(), llm=llm\n", - "# )\n", - "# import logging\n", - "\n", - "# logging.basicConfig()\n", - "# logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.INFO)" + " result = qa({\"query\": question})\n", + " result['rephrased_question'] = None\n", + " return result\n", + "\n", + "def ask_hyde(question):\n", + " result = call_hyde_llm(question)\n", + " display_result(question, result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Final Implementation" + "#### Vector Store with Sources" ] }, { "cell_type": "code", - "execution_count": 325, + "execution_count": 467, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.\n" + ] + } + ], "source": [ "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", @@ -1439,7 +1430,7 @@ "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", "\n", "embeddings = OpenAIEmbeddings()\n", - "vectorstore_with_sources = Chroma(\"vectorstore_with_sources3\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "vectorstore_with_sources = Chroma(\"vectorstore_with_sources6\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", "\n", "for i, d in enumerate(website_chunks):\n", " d.metadata['source'] = f\"w{i}-pl\"\n", @@ -1452,16 +1443,69 @@ " vectorstore_with_sources.add_documents([d])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### MultiQuery approach" + ] + }, { "cell_type": "code", - "execution_count": 375, + "execution_count": 479, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.retrievers.multi_query import MultiQueryRetriever\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "multiquery_retriever = MultiQueryRetriever.from_llm(\n", + " retriever=vectorstore_with_sources.as_retriever(), llm=llm\n", + ")\n", + "import logging\n", + "\n", + "logging.basicConfig()\n", + "logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.ERROR)" + ] + }, + { + "cell_type": "code", + "execution_count": 541, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n", + "\n", + "lowercased_website_chunks = []\n", + "for d in website_chunks:\n", + " dd = d.copy()\n", + " dd.page_content = d.page_content.lower()\n", + " lowercased_website_chunks.append(dd)\n", + "\n", + "\n", + "lowercased_gh_docs_chunks = []\n", + "for d in gh_docs_chunks:\n", + " dd = d.copy()\n", + " dd.page_content = d.page_content.lower()\n", + " lowercased_gh_docs_chunks.append(dd)\n", + "\n", + "bm25_retriever = BM25Retriever.from_documents(lowercased_website_chunks + lowercased_gh_docs_chunks)\n", + "bm25_retriever.k = 2\n", + "\n", + "ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, multiquery_retriever], weights=[0.5, 0.5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Final Implementation" + ] }, { "cell_type": "code", - "execution_count": 422, + "execution_count": 568, "metadata": {}, "outputs": [], "source": [ @@ -1471,28 +1515,15 @@ "\n", "model = ChatOpenAI(model_name=\"gpt-4\", temperature=0)\n", "\n", - "qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(model, chain_type=\"stuff\", retriever=vectorstore_with_sources.as_retriever(), return_source_documents=True)\n", - "\n", - "\n", - "def run_qa_with_sources(question, use_rephrased_questions=False):\n", + "qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(model, chain_type=\"stuff\", retriever=ensemble_retriever, return_source_documents=True)\n", "\n", - " rephrased_question = None\n", "\n", - " if not use_rephrased_questions:\n", - " result = qa_with_sources({\"question\": question}, return_only_outputs=True)\n", - " else:\n", + "def run_qa_with_sources(question):\n", " \n", - " rephrased_questions = generate_rephrased_questions(question)\n", - "\n", - " # Attempt each question until a valid result is found\n", - " for q in rephrased_questions:\n", - " result = qa_with_sources({\"question\": q}, return_only_outputs=True) \n", - " # If the model is unable to find an answer, it returns 'sorry' in the response, we try again with a different question\n", - " if 'sorry' in result['answer'].lower():\n", - " continue\n", - " else:\n", - " rephrased_question = q\n", - " break\n", + " # Santize the question by removing any trailing question marks\n", + " sanitized_question = question.rstrip(\"?\")\n", + "\n", + " result = qa_with_sources({\"question\": sanitized_question}, return_only_outputs=True)\n", "\n", " answer = result['answer']\n", " source_ids = result['sources']\n", @@ -1505,14 +1536,13 @@ " url = metadata['url']\n", " if source_id in source_ids:\n", " source_urls.add(url)\n", - " return dict(answer=answer, source_urls=source_urls, rephrased_question=rephrased_question, source_docs=source_docs)\n", + " return dict(answer=answer, source_urls=source_urls, source_docs=source_docs)\n", "\n", "def ask_with_sources(question, use_rephrased_questions=False):\n", " result = run_qa_with_sources(question, use_rephrased_questions)\n", "\n", " display(Markdown(f\"### Question\"))\n", " display(Markdown(\"ORIGINAL: \" + question))\n", - " display(Markdown(\"REPHRASED: \" + f\"{result['rephrased_question'] if 'rephrased_question' in result else 'None'}\"))\n", "\n", " display(Markdown(f\"### Answer\"))\n", " display(Markdown(result[\"answer\"]))\n", @@ -1523,35 +1553,7 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", - "MAVA_MISANSWERED_QUES = [\n", - " \"what's a scout?\",\n", - " \"Am I allowed to use AI in an audit?\",\n", - " \"Can I change my Code4rena username?\",\n", - " \"How do I book a solo audit?\",\n", - " \"Do I need to be certified to participate in an audit?\",\n", - " \"How do bot races work?\",\n", - " \"Can I change my Code4rena profile name?\",\n", - " \"What are scout awards?\",\n", - " \"What are analysis reports?\",\n", - " \"what is an analysis finding?\",\n", - " \"My name wasn't in the award announcements. When can I check on my results?\",\n", - " \"How long does the certification process take?\",\n", - " \"How can I access findings.csv?\",\n", - " \"Can I use chatgpt?\"\n", - "]\n", - "\n", - "for q in MAVA_MISANSWERED_QUES:\n", - " ask_with_sources(q, use_rephrased_questions=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 425, + "execution_count": 547, "metadata": {}, "outputs": [], "source": [ @@ -1591,14 +1593,14 @@ }, { "cell_type": "code", - "execution_count": 426, + "execution_count": 548, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Bot Accuracy: 0.7777777777777778\n" + "Bot Accuracy: 0.8888888888888888\n" ] }, { @@ -1634,15 +1636,15 @@ " 0\n", " Hi, how can I get backstage access?\n", " To get backstage access, you need to become a ...\n", - " The documents provided do not contain informat...\n", - " Incorrect\n", + " To get backstage access, you need to become a ...\n", + " Correct\n", " Incorrect\n", " \n", " \n", " 1\n", " how long does it take until findings are relea...\n", " Based on the context provided, the findings fr...\n", - " The findings from the audit are typically rele...\n", + " The findings are released between 42 to 60 day...\n", " Correct\n", " Correct\n", " \n", @@ -1650,15 +1652,15 @@ " 2\n", " When can I talk about findings?\n", " You can talk about your findings after the con...\n", - " You can talk about findings after they have be...\n", - " Incorrect\n", + " You can talk about findings after the findings...\n", " Incorrect\n", + " Correct\n", " \n", " \n", " 3\n", " How do I change my wallet address?\n", " To change your wallet address, follow these st...\n", - " You can change your payment information at any...\n", + " You can change your wallet address by logging ...\n", " Correct\n", " Correct\n", " \n", @@ -1666,8 +1668,8 @@ " 4\n", " What are scouts?\n", " In the context of Code4rena, Scouts are indivi...\n", - " In the context of Code4rena, scouts are indivi...\n", - " Correct\n", + " Scouts in the context of Code4rena are individ...\n", + " Incorrect\n", " Correct\n", " \n", " \n", @@ -1682,7 +1684,7 @@ " 6\n", " how does certification work?\n", " The certification process at Code4rena works i...\n", - " Certification works through a process where an...\n", + " Certification works by submitting an applicati...\n", " Correct\n", " Correct\n", " \n", @@ -1698,8 +1700,8 @@ " 8\n", " What is a lookout?\n", " In the context provided, a lookout is a role i...\n", - " In the context of Code4rena, a lookout is a ro...\n", - " Incorrect\n", + " In the context of Code4rena's competitions, a ...\n", + " Correct\n", " Correct\n", " \n", " \n", @@ -1730,29 +1732,29 @@ "8 In the context provided, a lookout is a role i... \n", "\n", " Bot answers \\\n", - "0 The documents provided do not contain informat... \n", - "1 The findings from the audit are typically rele... \n", - "2 You can talk about findings after they have be... \n", - "3 You can change your payment information at any... \n", - "4 In the context of Code4rena, scouts are indivi... \n", + "0 To get backstage access, you need to become a ... \n", + "1 The findings are released between 42 to 60 day... \n", + "2 You can talk about findings after the findings... \n", + "3 You can change your wallet address by logging ... \n", + "4 Scouts in the context of Code4rena are individ... \n", "5 The contest process usually takes between 42 t... \n", - "6 Certification works through a process where an... \n", + "6 Certification works by submitting an applicati... \n", "7 Yes, you can use bots to analyze code. Code4re... \n", - "8 In the context of Code4rena, a lookout is a ro... \n", + "8 In the context of Code4rena's competitions, a ... \n", "\n", " Retrieval relevancy score Answer similarity score \n", - "0 Incorrect Incorrect \n", + "0 Correct Incorrect \n", "1 Correct Correct \n", - "2 Incorrect Incorrect \n", + "2 Incorrect Correct \n", "3 Correct Correct \n", - "4 Correct Correct \n", + "4 Incorrect Correct \n", "5 Correct Correct \n", "6 Correct Correct \n", "7 Correct Correct \n", - "8 Incorrect Correct " + "8 Correct Correct " ] }, - "execution_count": 426, + "execution_count": 548, "metadata": {}, "output_type": "execute_result" } @@ -1761,6 +1763,326 @@ "auto_eval()" ] }, + { + "cell_type": "code", + "execution_count": 549, + "metadata": {}, + "outputs": [], + "source": [ + "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", + "MAVA_MISANSWERED_QUES = [\n", + " \"Am I allowed to use AI in an audit?\",\n", + " \"Can I change my Code4rena username?\",\n", + " \"How do I book a solo audit?\",\n", + " \"Do I need to be certified to participate in an audit?\",\n", + " \"How do bot races work?\",\n", + " \"Can I change my Code4rena profile name?\",\n", + " \"What are scout awards?\",\n", + " \"What are analysis reports?\",\n", + " \"what is an analysis finding?\",\n", + " \"My name wasn't in the award announcements. When can I check on my results?\",\n", + " \"How long does the certification process take?\",\n", + " \"How can I access findings.csv?\",\n", + " \"Can I use chatgpt?\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 550, + "metadata": {}, + "outputs": [], + "source": [ + "labeled_ques = [d['question'] for d in yaml_data]\n", + "eval_set = labeled_ques + MAVA_MISANSWERED_QUES" + ] + }, + { + "cell_type": "code", + "execution_count": 571, + "metadata": {}, + "outputs": [], + "source": [ + "eval_results = []\n", + "for q in eval_set:\n", + " result = run_qa_with_sources(q)\n", + " eval_results.append(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 577, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questionBot answersSources
0Hi, how can I get backstage access?To get backstage access, you need to meet the ...https://github.com/code-423n4/docs/blob/main//...
1how long does it take until findings are relea...The findings are released between 42 to 60 day...https://github.com/code-423n4/docs/blob/main//...
2When can I talk about findings?You can talk about findings after they are mad...https://github.com/code-423n4/docs/blob/main//...
3How do I change my wallet address?You can change your wallet address by logging ...https://github.com/code-423n4/docs/blob/main//...
4What are scouts?Scouts in the context of Code4rena focus on sc...https://code4rena.com/how-it-works, https://gi...
5How long does the contest process usually take?The contest process usually takes between 42 t...https://github.com/code-423n4/docs/blob/main//...
6how does certification work?Certification works through a process where an...https://github.com/code-423n4/docs/blob/main//...
7Can I use bots to analyze code?Yes, you can use bots to analyze code. Code4re...https://code4rena.com/how-it-works, https://co...
8What is a lookout?In the context of Code4rena, a lookout is a ro...https://github.com/code-423n4/docs/blob/main//...
9what's a scout?In the context of Code4rena, a Scout is a role...https://github.com/code-423n4/docs/blob/main//...
10Am I allowed to use AI in an audit?Yes, you are allowed to use AI in an audit. Ho...https://github.com/code-423n4/docs/blob/main//...
11Can I change my Code4rena username?No, you cannot change your Code4rena username....https://code4rena.com/register
12How do I book a solo audit?To book a solo audit, a project team member sh...https://github.com/code-423n4/docs/blob/main//...
13Do I need to be certified to participate in an...Yes, you need to be a certified contributor to...https://github.com/code-423n4/docs/blob/main//...
14How do bot races work?Bot races work in two stages. In the first sta...https://code4rena.com/how-it-works, https://co...
15Can I change my Code4rena profile name?The documents do not provide information on wh...https://code4rena.com/register, https://github...
16What are scout awards?Scout awards are part of the incentive model u...https://github.com/code-423n4/docs/blob/main//...
17What are analysis reports?Analysis reports are written submissions that ...https://github.com/code-423n4/docs/blob/main//...
18what is an analysis finding?An analysis finding is a written submission th...https://github.com/code-423n4/docs/blob/main//...
19My name wasn't in the award announcements. Whe...You can confirm that Code4rena has received yo...https://github.com/code-423n4/docs/blob/main//...
20How long does the certification process take?Once you submit the certified contributor appl...https://github.com/code-423n4/docs/blob/main//...
21How can I access findings.csv?To access findings.csv, you need to sign into ...https://github.com/code-423n4/docs/blob/main//...
22Can I use chatgpt?The use of ChatGPT or similar automated tools ...https://github.com/code-423n4/docs/blob/main//...
\n", + "
" + ], + "text/plain": [ + " question \\\n", + "0 Hi, how can I get backstage access? \n", + "1 how long does it take until findings are relea... \n", + "2 When can I talk about findings? \n", + "3 How do I change my wallet address? \n", + "4 What are scouts? \n", + "5 How long does the contest process usually take? \n", + "6 how does certification work? \n", + "7 Can I use bots to analyze code? \n", + "8 What is a lookout? \n", + "9 what's a scout? \n", + "10 Am I allowed to use AI in an audit? \n", + "11 Can I change my Code4rena username? \n", + "12 How do I book a solo audit? \n", + "13 Do I need to be certified to participate in an... \n", + "14 How do bot races work? \n", + "15 Can I change my Code4rena profile name? \n", + "16 What are scout awards? \n", + "17 What are analysis reports? \n", + "18 what is an analysis finding? \n", + "19 My name wasn't in the award announcements. Whe... \n", + "20 How long does the certification process take? \n", + "21 How can I access findings.csv? \n", + "22 Can I use chatgpt? \n", + "\n", + " Bot answers \\\n", + "0 To get backstage access, you need to meet the ... \n", + "1 The findings are released between 42 to 60 day... \n", + "2 You can talk about findings after they are mad... \n", + "3 You can change your wallet address by logging ... \n", + "4 Scouts in the context of Code4rena focus on sc... \n", + "5 The contest process usually takes between 42 t... \n", + "6 Certification works through a process where an... \n", + "7 Yes, you can use bots to analyze code. Code4re... \n", + "8 In the context of Code4rena, a lookout is a ro... \n", + "9 In the context of Code4rena, a Scout is a role... \n", + "10 Yes, you are allowed to use AI in an audit. Ho... \n", + "11 No, you cannot change your Code4rena username.... \n", + "12 To book a solo audit, a project team member sh... \n", + "13 Yes, you need to be a certified contributor to... \n", + "14 Bot races work in two stages. In the first sta... \n", + "15 The documents do not provide information on wh... \n", + "16 Scout awards are part of the incentive model u... \n", + "17 Analysis reports are written submissions that ... \n", + "18 An analysis finding is a written submission th... \n", + "19 You can confirm that Code4rena has received yo... \n", + "20 Once you submit the certified contributor appl... \n", + "21 To access findings.csv, you need to sign into ... \n", + "22 The use of ChatGPT or similar automated tools ... \n", + "\n", + " Sources \n", + "0 https://github.com/code-423n4/docs/blob/main//... \n", + "1 https://github.com/code-423n4/docs/blob/main//... \n", + "2 https://github.com/code-423n4/docs/blob/main//... \n", + "3 https://github.com/code-423n4/docs/blob/main//... \n", + "4 https://code4rena.com/how-it-works, https://gi... \n", + "5 https://github.com/code-423n4/docs/blob/main//... \n", + "6 https://github.com/code-423n4/docs/blob/main//... \n", + "7 https://code4rena.com/how-it-works, https://co... \n", + "8 https://github.com/code-423n4/docs/blob/main//... \n", + "9 https://github.com/code-423n4/docs/blob/main//... \n", + "10 https://github.com/code-423n4/docs/blob/main//... \n", + "11 https://code4rena.com/register \n", + "12 https://github.com/code-423n4/docs/blob/main//... \n", + "13 https://github.com/code-423n4/docs/blob/main//... \n", + "14 https://code4rena.com/how-it-works, https://co... \n", + "15 https://code4rena.com/register, https://github... \n", + "16 https://github.com/code-423n4/docs/blob/main//... \n", + "17 https://github.com/code-423n4/docs/blob/main//... \n", + "18 https://github.com/code-423n4/docs/blob/main//... \n", + "19 https://github.com/code-423n4/docs/blob/main//... \n", + "20 https://github.com/code-423n4/docs/blob/main//... \n", + "21 https://github.com/code-423n4/docs/blob/main//... \n", + "22 https://github.com/code-423n4/docs/blob/main//... " + ] + }, + "execution_count": 577, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({\n", + " \"question\": [q for q in eval_set],\n", + " \"Bot answers\": [r['answer'] for r in eval_results],\n", + " \"Sources\": [ \", \".join(r['source_urls']) for r in eval_results],\n", + "})\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 578, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_excel(\"./outputs/eval_results.xlsx\", index=False)" + ] + }, { "cell_type": "code", "execution_count": null, From d9715eff133d5ce1dfe9b63456785a75cb554669 Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Thu, 14 Sep 2023 20:02:33 -0500 Subject: [PATCH 09/11] wip --- qa_bot/qa_bot.ipynb | 603 +++++++------------------------------------- 1 file changed, 93 insertions(+), 510 deletions(-) diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index a988abe..5c60a1e 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -35,422 +35,9 @@ }, { "cell_type": "code", - "execution_count": 497, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: langchain[llms] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.0.278)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.20)\n", - "Requirement already satisfied: PyYAML>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (6.0.1)\n", - "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.31)\n", - "Requirement already satisfied: pydantic<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.10.12)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.0.3)\n", - "Requirement already satisfied: requests<3,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.31.0)\n", - "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (8.2.3)\n", - "Requirement already satisfied: numpy<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.25.2)\n", - "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.8.5)\n", - "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.5.14)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (3.8.5)\n", - "Requirement already satisfied: openai<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.28.0)\n", - "Requirement already satisfied: torch<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.1)\n", - "Requirement already satisfied: openlm<0.0.6,>=0.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.5)\n", - "Requirement already satisfied: cohere<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.21)\n", - "Requirement already satisfied: manifest-ml<0.0.2,>=0.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.1)\n", - "Requirement already satisfied: nlpcloud<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.1.44)\n", - "Requirement already satisfied: clarifai>=9.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (9.7.6)\n", - "Requirement already satisfied: huggingface_hub<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.16.4)\n", - "Requirement already satisfied: transformers<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.32.1)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.3.1)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (3.2.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (23.1.0)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.4.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (6.0.4)\n", - "Requirement already satisfied: clarifai-grpc>=9.7.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (9.7.6)\n", - "Requirement already satisfied: rich==13.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (13.4.2)\n", - "Requirement already satisfied: tritonclient==2.34.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (2.34.0)\n", - "Collecting tqdm==4.64.1\n", - " Using cached tqdm-4.64.1-py2.py3-none-any.whl (78 kB)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (2.16.1)\n", - "Requirement already satisfied: python-rapidjson>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tritonclient==2.34.0->clarifai>=9.1.0->langchain[llms]) (1.10)\n", - "Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (6.8.0)\n", - "Requirement already satisfied: urllib3<3,>=1.26 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.0.4)\n", - "Requirement already satisfied: backoff<3.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.2.1)\n", - "Requirement already satisfied: fastavro==1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (1.8.2)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (0.9.0)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (3.20.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (4.7.1)\n", - "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (2023.6.0)\n", - "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (3.12.3)\n", - "Requirement already satisfied: dill>=0.3.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (0.3.7)\n", - "Requirement already satisfied: sqlitedict>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (2.1.0)\n", - "Requirement already satisfied: redis>=4.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (5.0.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (2023.7.22)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain[llms]) (2.0.2)\n", - "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1.2)\n", - "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (1.12)\n", - "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (2023.8.8)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.13.3)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.3.3)\n", - "Requirement already satisfied: grpcio>=1.44.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.57.0)\n", - "Requirement already satisfied: protobuf>=3.20.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (4.24.2)\n", - "Requirement already satisfied: googleapis-common-protos>=1.53.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.60.0)\n", - "Requirement already satisfied: zipp>=0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5,>=4->langchain[llms]) (3.16.2)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (1.0.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch<3,>=1->langchain[llms]) (2.1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->torch<3,>=1->langchain[llms]) (1.3.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (0.1.2)\n", - "Installing collected packages: tqdm\n", - " Attempting uninstall: tqdm\n", - " Found existing installation: tqdm 4.66.1\n", - " Uninstalling tqdm-4.66.1:\n", - " Successfully uninstalled tqdm-4.66.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "chromadb 0.4.8 requires tqdm>=4.65.0, but you have tqdm 4.64.1 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed tqdm-4.64.1\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: Scrapy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.10.1)\n", - "Requirement already satisfied: itemloaders>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.1.0)\n", - "Requirement already satisfied: cssselect>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.2.0)\n", - "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (63.2.0)\n", - "Requirement already satisfied: lxml>=4.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (4.9.3)\n", - "Requirement already satisfied: w3lib>=1.17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.1.2)\n", - "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (41.0.3)\n", - "Requirement already satisfied: PyDispatcher>=2.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.0.7)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1)\n", - "Requirement already satisfied: zope.interface>=5.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (6.0)\n", - "Requirement already satisfied: pyOpenSSL>=21.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.2.0)\n", - "Requirement already satisfied: service-identity>=18.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1.0)\n", - "Requirement already satisfied: tldextract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (3.4.4)\n", - "Requirement already satisfied: protego>=0.1.15 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.3.0)\n", - "Requirement already satisfied: queuelib>=1.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.6.2)\n", - "Requirement already satisfied: parsel>=1.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.8.1)\n", - "Requirement already satisfied: itemadapter>=0.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.8.0)\n", - "Requirement already satisfied: Twisted<23.8.0,>=18.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (22.10.0)\n", - "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->Scrapy) (1.15.1)\n", - "Requirement already satisfied: jmespath>=0.9.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from itemloaders>=1.0.1->Scrapy) (1.0.1)\n", - "Requirement already satisfied: pyasn1-modules in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.3.0)\n", - "Requirement already satisfied: pyasn1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.5.0)\n", - "Requirement already satisfied: attrs>=19.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (23.1.0)\n", - "Requirement already satisfied: hyperlink>=17.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (21.0.0)\n", - "Requirement already satisfied: constantly>=15.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (15.1.0)\n", - "Requirement already satisfied: typing-extensions>=3.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (4.7.1)\n", - "Requirement already satisfied: Automat>=0.8.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", - "Requirement already satisfied: incremental>=21.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", - "Requirement already satisfied: requests>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (2.31.0)\n", - "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.3)\n", - "Requirement already satisfied: requests-file>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (1.5.1)\n", - "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", - "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Automat>=0.8.0->Twisted<23.8.0,>=18.9.0->Scrapy) (1.16.0)\n", - "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->Scrapy) (2.21)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2.0.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (3.2.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: html2text in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2020.1.16)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (4.9.3)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: python-dotenv in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.0.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: unstructured[all-docs] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.10.11)\n", - "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", - "Requirement already satisfied: nltk in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.8.1)\n", - "Requirement already satisfied: tabulate in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.9.0)\n", - "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.14)\n", - "Requirement already satisfied: python-magic in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.4.27)\n", - "Requirement already satisfied: filetype in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", - "Requirement already satisfied: chardet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (5.2.0)\n", - "Requirement already satisfied: requests in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.31.0)\n", - "Requirement already satisfied: emoji in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.8.0)\n", - "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.12.2)\n", - "Requirement already satisfied: pypandoc in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.11)\n", - "Requirement already satisfied: pdf2image in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.16.3)\n", - "Requirement already satisfied: Pillow<10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (9.5.0)\n", - "Requirement already satisfied: python-docx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.8.11)\n", - "Requirement already satisfied: pdfminer.six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (20221105)\n", - "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.1.0)\n", - "Requirement already satisfied: python-pptx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.22)\n", - "Requirement already satisfied: xlrd in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.0.1)\n", - "Requirement already satisfied: msg-parser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", - "Requirement already satisfied: openpyxl in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.1.2)\n", - "Requirement already satisfied: markdown in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.4.4)\n", - "Requirement already satisfied: ebooklib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.18)\n", - "Requirement already satisfied: unstructured-inference in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.19)\n", - "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->unstructured[all-docs]) (2.4.1)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", - "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ebooklib->unstructured[all-docs]) (1.16.0)\n", - "Requirement already satisfied: olefile>=0.46 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from msg-parser->unstructured[all-docs]) (0.46)\n", - "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", - "Requirement already satisfied: regex>=2021.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (2023.8.8)\n", - "Requirement already satisfied: joblib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (1.3.2)\n", - "Requirement already satisfied: click in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (8.1.7)\n", - "Requirement already satisfied: et-xmlfile in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from openpyxl->unstructured[all-docs]) (1.1.0)\n", - "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.25.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2.8.2)\n", - "Requirement already satisfied: charset-normalizer>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (3.2.0)\n", - "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (41.0.3)\n", - "Requirement already satisfied: XlsxWriter>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-pptx->unstructured[all-docs]) (3.1.2)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2.0.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2023.7.22)\n", - "Requirement already satisfied: huggingface-hub in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.16.4)\n", - "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.3.4)\n", - "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.32.1)\n", - "Requirement already satisfied: onnxruntime in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (1.15.1)\n", - "Requirement already satisfied: python-multipart in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.0.6)\n", - "Requirement already satisfied: opencv-python!=4.7.0.68 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.8.0.76)\n", - "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.15.1)\n", - "Requirement already satisfied: packaging>=17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->unstructured[all-docs]) (23.1)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.13.3)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.3.3)\n", - "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.3)\n", - "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", - "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (2023.6.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (4.7.1)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json->unstructured[all-docs]) (1.0.0)\n", - "Requirement already satisfied: scipy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.11.2)\n", - "Requirement already satisfied: pdfplumber in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.10.2)\n", - "Requirement already satisfied: iopath in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.1.10)\n", - "Requirement already satisfied: pytesseract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.3.10)\n", - "Requirement already satisfied: effdet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.4.1)\n", - "Requirement already satisfied: torch in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.1)\n", - "Requirement already satisfied: torchvision in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.15.2)\n", - "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (23.5.26)\n", - "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (1.12)\n", - "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (15.0.1)\n", - "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (4.24.2)\n", - "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.21)\n", - "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime->unstructured-inference->unstructured[all-docs]) (10.0)\n", - "Requirement already satisfied: timm>=0.9.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.9.6)\n", - "Requirement already satisfied: pycocotools>=2.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.7)\n", - "Requirement already satisfied: omegaconf>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.3.0)\n", - "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1)\n", - "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1.2)\n", - "Requirement already satisfied: portalocker in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.7.0)\n", - "Requirement already satisfied: pypdfium2>=4.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.19.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime->unstructured-inference->unstructured[all-docs]) (1.3.0)\n", - "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.9.3)\n", - "Requirement already satisfied: matplotlib>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.7.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.1.3)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.42.1)\n", - "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.0.9)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.1.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.4.5)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.11.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: tiktoken in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.0)\n", - "Requirement already satisfied: requests>=2.26.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2.31.0)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: faiss-cpu in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.7.4)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: GitPython in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (3.1.33)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from GitPython) (4.0.10)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from gitdb<5,>=4.0.1->GitPython) (5.0.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: notebook in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (7.0.3)\n", - "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (0.2.3)\n", - "Requirement already satisfied: jupyterlab<5,>=4.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (4.0.5)\n", - "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (6.3.3)\n", - "Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.24.0)\n", - "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.7.3)\n", - "Requirement already satisfied: nbformat>=5.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.2)\n", - "Requirement already satisfied: websocket-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.6.2)\n", - "Requirement already satisfied: terminado>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", - "Requirement already satisfied: pyzmq>=24 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (25.1.1)\n", - "Requirement already satisfied: prometheus-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", - "Requirement already satisfied: argon2-cffi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1.0)\n", - "Requirement already satisfied: jupyter-server-terminals in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.4.4)\n", - "Requirement already satisfied: nbconvert>=6.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.8.0)\n", - "Requirement already satisfied: overrides in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.4.0)\n", - "Requirement already satisfied: jupyter-events>=0.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", - "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (4.0.0)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", - "Requirement already satisfied: traitlets>=5.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.0)\n", - "Requirement already satisfied: jupyter-client>=7.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (8.3.1)\n", - "Requirement already satisfied: send2trash>=1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.8.2)\n", - "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.3.1)\n", - "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", - "Requirement already satisfied: async-lru>=1.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.4)\n", - "Requirement already satisfied: tomli in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.1)\n", - "Requirement already satisfied: jupyter-lsp>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.2.0)\n", - "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.1)\n", - "Requirement already satisfied: jsonschema>=4.17.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (4.19.0)\n", - "Requirement already satisfied: json5>=0.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (0.9.14)\n", - "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.31.0)\n", - "Requirement already satisfied: babel>=2.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.12.1)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.1.3)\n", - "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", - "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab<5,>=4.0.2->notebook) (4.7.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook) (2.1.3)\n", - "Requirement already satisfied: referencing>=0.28.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.30.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.10.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-client>=7.4.4->jupyter-server<3,>=2.4.0->notebook) (2.8.2)\n", - "Requirement already satisfied: platformdirs>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->jupyter-server<3,>=2.4.0->notebook) (3.10.0)\n", - "Requirement already satisfied: rfc3339-validator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.4)\n", - "Requirement already satisfied: python-json-logger>=2.0.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (2.0.7)\n", - "Requirement already satisfied: rfc3986-validator>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.1)\n", - "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", - "Requirement already satisfied: defusedxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.7.1)\n", - "Requirement already satisfied: nbclient>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.8.0)\n", - "Requirement already satisfied: tinycss2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.2.1)\n", - "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.5.0)\n", - "Requirement already satisfied: jupyterlab-pygments in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.2.2)\n", - "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", - "Requirement already satisfied: pygments>=2.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.16.1)\n", - "Requirement already satisfied: bleach!=5.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (6.0.0)\n", - "Requirement already satisfied: mistune<4,>=2.0.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (3.0.1)\n", - "Requirement already satisfied: fastjsonschema in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook) (2.18.0)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2.0.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (3.2.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", - "Requirement already satisfied: ptyprocess in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from terminado>=0.8.3->jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", - "Requirement already satisfied: argon2-cffi-bindings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (21.2.0)\n", - "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", - "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", - "Requirement already satisfied: debugpy>=1.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.6.7.post1)\n", - "Requirement already satisfied: ipython>=7.23.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (8.14.0)\n", - "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", - "Requirement already satisfied: psutil in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.9.5)\n", - "Requirement already satisfied: nest-asyncio in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.5.7)\n", - "Requirement already satisfied: six>=1.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.16.0)\n", - "Requirement already satisfied: webencodings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.5.1)\n", - "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", - "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", - "Requirement already satisfied: stack-data in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.6.2)\n", - "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (3.0.39)\n", - "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", - "Requirement already satisfied: pexpect>4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", - "Requirement already satisfied: decorator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.1.1)\n", - "Requirement already satisfied: isoduration in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (20.11.0)\n", - "Requirement already satisfied: webcolors>=1.11 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.13)\n", - "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", - "Requirement already satisfied: fqdn in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.5.1)\n", - "Requirement already satisfied: uri-template in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.3.0)\n", - "Requirement already satisfied: cffi>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (1.15.1)\n", - "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.4.1)\n", - "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (2.21)\n", - "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.8.3)\n", - "Requirement already satisfied: wcwidth in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.6)\n", - "Requirement already satisfied: arrow>=0.15.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from isoduration->jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.2.3)\n", - "Requirement already satisfied: asttokens>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (2.2.1)\n", - "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", - "Requirement already satisfied: executing>=1.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.2.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: chromadb in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.8)\n", - "Requirement already satisfied: pydantic<2.0,>=1.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.10.12)\n", - "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", - "Requirement already satisfied: pypika>=0.48.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.48.9)\n", - "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.3.0)\n", - "Requirement already satisfied: numpy>=1.21.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.25.2)\n", - "Requirement already satisfied: tokenizers>=0.13.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.13.3)\n", - "Collecting tqdm>=4.65.0\n", - " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", - "Requirement already satisfied: importlib-resources in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (6.0.1)\n", - "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.0.1)\n", - "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.23.2)\n", - "Requirement already satisfied: fastapi<0.100.0,>=0.95.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.99.1)\n", - "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.15.1)\n", - "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.7.1)\n", - "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.7.2)\n", - "Requirement already satisfied: posthog>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.0.2)\n", - "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", - "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from fastapi<0.100.0,>=0.95.2->chromadb) (0.27.0)\n", - "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", - "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", - "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", - "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", - "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.2)\n", - "Requirement already satisfied: python-dateutil>2.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n", - "Requirement already satisfied: backoff>=1.10.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", - "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", - "Requirement already satisfied: monotonic>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", - "Requirement already satisfied: certifi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", - "Requirement already satisfied: click>=7.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.7)\n", - "Requirement already satisfied: h11>=0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", - "Requirement already satisfied: python-dotenv>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", - "Requirement already satisfied: watchfiles>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n", - "Requirement already satisfied: httptools>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\n", - "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", - "Requirement already satisfied: websockets>=10.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.3)\n", - "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (4.0.0)\n", - "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.3)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n", - "Installing collected packages: tqdm\n", - " Attempting uninstall: tqdm\n", - " Found existing installation: tqdm 4.64.1\n", - " Uninstalling tqdm-4.64.1:\n", - " Successfully uninstalled tqdm-4.64.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "clarifai 9.7.6 requires tqdm==4.64.1, but you have tqdm 4.66.1 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed tqdm-4.66.1\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.1.0)\n", - "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (1.25.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", - "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Collecting rank_bm25\n", - " Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)\n", - "Requirement already satisfied: numpy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rank_bm25) (1.25.2)\n", - "Installing collected packages: rank_bm25\n", - "Successfully installed rank_bm25-0.2.2\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "# Install all the third-party packages\n", "\n", @@ -736,91 +323,9 @@ }, { "cell_type": "code", - "execution_count": 271, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['72e47726-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e4782a-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47866-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47898-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e478c0-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e478e8-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47910-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47938-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47960-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47988-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e479b0-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e479d8-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47a00-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47a28-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47a50-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47a78-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47a96-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47abe-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47ae6-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47b0e-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47b36-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47b5e-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47b86-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47ba4-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47bcc-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47bf4-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47c1c-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47c44-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47c6c-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47c8a-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47cb2-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47cda-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47d02-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47d2a-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47d52-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47d70-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47d98-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47dc0-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47de8-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47e10-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47e38-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47e60-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47e7e-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47ea6-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47ece-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47ef6-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47f1e-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47f3c-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47f64-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47f8c-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47fb4-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e47fdc-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48004-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e4802c-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e4804a-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48072-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e4809a-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e480c2-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e480ea-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48112-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48130-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48158-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48180-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e481a8-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e481d0-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e481f8-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48220-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48248-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e48270-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e4828e-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e482b6-5246-11ee-8d9d-367dda1ae1c5',\n", - " '72e482de-5246-11ee-8d9d-367dda1ae1c5']" - ] - }, - "execution_count": 271, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from langchain.vectorstores import FAISS\n", "from langchain.embeddings import OpenAIEmbeddings\n", @@ -930,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 408, + "execution_count": 584, "metadata": {}, "outputs": [], "source": [ @@ -963,12 +468,7 @@ " result['rephrased_question'] = q\n", " break\n", "\n", - " return result\n", - " \n", - "\n", - "def ask(question, use_rephrased_questions=True):\n", - " result = call_llm(question, use_rephrased_questions)\n", - " display_result(question, result)\n" + " return result\n" ] }, { @@ -1505,7 +1005,7 @@ }, { "cell_type": "code", - "execution_count": 568, + "execution_count": 585, "metadata": {}, "outputs": [], "source": [ @@ -1538,8 +1038,8 @@ " source_urls.add(url)\n", " return dict(answer=answer, source_urls=source_urls, source_docs=source_docs)\n", "\n", - "def ask_with_sources(question, use_rephrased_questions=False):\n", - " result = run_qa_with_sources(question, use_rephrased_questions)\n", + "def ask(question):\n", + " result = run_qa_with_sources(question)\n", "\n", " display(Markdown(f\"### Question\"))\n", " display(Markdown(\"ORIGINAL: \" + question))\n", @@ -1553,7 +1053,7 @@ }, { "cell_type": "code", - "execution_count": 547, + "execution_count": 586, "metadata": {}, "outputs": [], "source": [ @@ -2083,6 +1583,89 @@ "df.to_excel(\"./outputs/eval_results.xlsx\", index=False)" ] }, + { + "cell_type": "code", + "execution_count": 588, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "### Question" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "ORIGINAL: My wallet was hacked. What do I do?" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Answer" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "If your wallet was hacked, follow these steps:\n", + "\n", + "1. If you are not logged in and you haven't set up your password yet, click \"Log in\" from the connect dropdown and then click \"forgot password\" to get a password reset link.\n", + "2. Log in with your username and password.\n", + "3. Update your payment addresses from the account page.\n", + "4. Submit a help request through the Help Desk while logged in so that the hacked wallet can be removed from your account.\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### Sources" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/code-423n4/docs/blob/main//roles/wardens/warden-auth.md\n" + ] + } + ], + "source": [ + "ask(\"My wallet was hacked. What do I do?\")" + ] + }, { "cell_type": "code", "execution_count": null, From 374e11479049b76abc4853b5153415ac83a01b13 Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Fri, 15 Sep 2023 13:38:33 -0500 Subject: [PATCH 10/11] wip --- qa_bot/qa_bot.ipynb | 793 ++++---------------------------------------- 1 file changed, 57 insertions(+), 736 deletions(-) diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb index 5c60a1e..e335a43 100644 --- a/qa_bot/qa_bot.ipynb +++ b/qa_bot/qa_bot.ipynb @@ -58,20 +58,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# General setup - you can specify OPENAI_API_KEY in .env file\n", "\n", @@ -85,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -99,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -184,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -238,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -261,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -281,19 +270,9 @@ }, { "cell_type": "code", - "execution_count": 465, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "89\n", - "97\n", - "72\n" - ] - } - ], + "outputs": [], "source": [ "from langchain.text_splitter import (\n", " RecursiveCharacterTextSplitter,\n", @@ -334,7 +313,8 @@ "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", "\n", "embeddings = OpenAIEmbeddings()\n", - "vectorstore = Chroma(\"vectorstore_1\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "Chroma(\"vectorstore\").delete_collection()\n", + "vectorstore = Chroma(\"vectorstore\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", "\n", "vectorstore.add_documents(website_chunks)\n", "#vectorstore.add_documents(docs_chunks)\n", @@ -361,22 +341,9 @@ }, { "cell_type": "code", - "execution_count": 383, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['What is the meaning of scout awards?',\n", - " 'Can you explain what scout awards are?',\n", - " 'Could you provide a description of scout awards?']" - ] - }, - "execution_count": 383, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from langchain.chains import LLMChain\n", "from langchain.chat_models import ChatOpenAI\n", @@ -416,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 296, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -435,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 584, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -481,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 574, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -498,7 +465,7 @@ }, { "cell_type": "code", - "execution_count": 229, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -556,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 230, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -600,7 +567,7 @@ }, { "cell_type": "code", - "execution_count": 409, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -614,7 +581,7 @@ }, { "cell_type": "code", - "execution_count": 411, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -625,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 415, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -640,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 416, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -649,165 +616,9 @@ }, { "cell_type": "code", - "execution_count": 417, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questionMava correct answer (True value)Bot answersRetrieval relevancy scoreAnswer similarity score
0Hi, how can I get backstage access?To get backstage access, you need to become a ...To obtain +Backstage access, you need to meet ...CorrectIncorrect
1how long does it take until findings are relea...Based on the context provided, the findings fr...The audit report is published and audit issues...CorrectCorrect
2When can I talk about findings?You can talk about your findings after the con...You can discuss the findings after the audit r...IncorrectCorrect
3How do I change my wallet address?To change your wallet address, follow these st...To update your wallet address, you need to:\\n\\...CorrectCorrect
4What are scouts?In the context of Code4rena, Scouts are indivi...Scouts in the context of Code4rena are individ...CorrectCorrect
5How long does the contest process usually take?Based on the provided context, the contest pro...Most audits typically run for 3-7 days.CorrectIncorrect
6how does certification work?The certification process at Code4rena works i...The certification process is as follows:\\n\\n1....CorrectCorrect
7Can I use bots to analyze code?Yes, you can use bots to analyze code. In fact...Yes, it is possible to utilize bots for code a...CorrectCorrect
8What is a lookout?In the context provided, a lookout is a role i...A Lookout in the context of Code4rena's compet...IncorrectCorrect
\n", - "
" - ], - "text/plain": [ - " question \\\n", - "0 Hi, how can I get backstage access? \n", - "1 how long does it take until findings are relea... \n", - "2 When can I talk about findings? \n", - "3 How do I change my wallet address? \n", - "4 What are scouts? \n", - "5 How long does the contest process usually take? \n", - "6 how does certification work? \n", - "7 Can I use bots to analyze code? \n", - "8 What is a lookout? \n", - "\n", - " Mava correct answer (True value) \\\n", - "0 To get backstage access, you need to become a ... \n", - "1 Based on the context provided, the findings fr... \n", - "2 You can talk about your findings after the con... \n", - "3 To change your wallet address, follow these st... \n", - "4 In the context of Code4rena, Scouts are indivi... \n", - "5 Based on the provided context, the contest pro... \n", - "6 The certification process at Code4rena works i... \n", - "7 Yes, you can use bots to analyze code. In fact... \n", - "8 In the context provided, a lookout is a role i... \n", - "\n", - " Bot answers \\\n", - "0 To obtain +Backstage access, you need to meet ... \n", - "1 The audit report is published and audit issues... \n", - "2 You can discuss the findings after the audit r... \n", - "3 To update your wallet address, you need to:\\n\\... \n", - "4 Scouts in the context of Code4rena are individ... \n", - "5 Most audits typically run for 3-7 days. \n", - "6 The certification process is as follows:\\n\\n1.... \n", - "7 Yes, it is possible to utilize bots for code a... \n", - "8 A Lookout in the context of Code4rena's compet... \n", - "\n", - " Retrieval relevancy score Answer similarity score \n", - "0 Correct Incorrect \n", - "1 Correct Correct \n", - "2 Incorrect Correct \n", - "3 Correct Correct \n", - "4 Correct Correct \n", - "5 Correct Incorrect \n", - "6 Correct Correct \n", - "7 Correct Correct \n", - "8 Incorrect Correct " - ] - }, - "execution_count": 417, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -837,7 +648,10 @@ "metadata": {}, "outputs": [], "source": [ - "vectorstore_hyde = Chroma(\"store_hyde_1\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "collection_name = \"vectorstore_hyde\"\n", + "Chroma(collection_name).delete_collection()\n", + "\n", + "vectorstore_hyde = Chroma(collection_name, embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", "vectorstore_hyde.add_documents(website_chunks)\n", "vectorstore_hyde.add_documents(gh_docs_chunks)" ] @@ -884,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 299, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -911,18 +725,9 @@ }, { "cell_type": "code", - "execution_count": 467, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n", - "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised APIError: OpenAI API returned an empty embedding.\n" - ] - } - ], + "outputs": [], "source": [ "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", @@ -930,17 +735,21 @@ "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", "\n", "embeddings = OpenAIEmbeddings()\n", - "vectorstore_with_sources = Chroma(\"vectorstore_with_sources6\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "collection_name = \"vectorstore_with_sources\"\n", + "Chroma(collection_name).delete_collection()\n", + "vectorstore_with_sources = Chroma(collection_name, embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", "\n", "for i, d in enumerate(website_chunks):\n", - " d.metadata['source'] = f\"w{i}-pl\"\n", - " vectorstore_with_sources.add_documents([d])\n", + " dd = d.copy()\n", + " dd.metadata['source'] = f\"w{i}-pl\"\n", + " vectorstore_with_sources.add_documents([dd])\n", "\n", "for i, d in enumerate(gh_docs_chunks):\n", - " local_path = d.metadata['source']\n", - " d.metadata['source'] = f\"g{i}-pl\"\n", - " d.metadata['url'] = f\"{local_path.replace(C4_GH_DOCS_STORAGE_DIR, 'https://github.com/code-423n4/docs/blob/main/')}\"\n", - " vectorstore_with_sources.add_documents([d])" + " dd = d.copy()\n", + " local_path = dd.metadata['source']\n", + " dd.metadata['source'] = f\"g{i}-pl\"\n", + " dd.metadata['url'] = f\"{local_path.replace(C4_GH_DOCS_STORAGE_DIR, 'https://github.com/code-423n4/docs/blob/main/')}\"\n", + " vectorstore_with_sources.add_documents([dd])" ] }, { @@ -952,7 +761,7 @@ }, { "cell_type": "code", - "execution_count": 479, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -971,7 +780,7 @@ }, { "cell_type": "code", - "execution_count": 541, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1005,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": 585, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1053,7 +862,7 @@ }, { "cell_type": "code", - "execution_count": 586, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1093,179 +902,16 @@ }, { "cell_type": "code", - "execution_count": 548, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bot Accuracy: 0.8888888888888888\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questionMava correct answer (True value)Bot answersRetrieval relevancy scoreAnswer similarity score
0Hi, how can I get backstage access?To get backstage access, you need to become a ...To get backstage access, you need to become a ...CorrectIncorrect
1how long does it take until findings are relea...Based on the context provided, the findings fr...The findings are released between 42 to 60 day...CorrectCorrect
2When can I talk about findings?You can talk about your findings after the con...You can talk about findings after the findings...IncorrectCorrect
3How do I change my wallet address?To change your wallet address, follow these st...You can change your wallet address by logging ...CorrectCorrect
4What are scouts?In the context of Code4rena, Scouts are indivi...Scouts in the context of Code4rena are individ...IncorrectCorrect
5How long does the contest process usually take?Based on the provided context, the contest pro...The contest process usually takes between 42 t...CorrectCorrect
6how does certification work?The certification process at Code4rena works i...Certification works by submitting an applicati...CorrectCorrect
7Can I use bots to analyze code?Yes, you can use bots to analyze code. In fact...Yes, you can use bots to analyze code. Code4re...CorrectCorrect
8What is a lookout?In the context provided, a lookout is a role i...In the context of Code4rena's competitions, a ...CorrectCorrect
\n", - "
" - ], - "text/plain": [ - " question \\\n", - "0 Hi, how can I get backstage access? \n", - "1 how long does it take until findings are relea... \n", - "2 When can I talk about findings? \n", - "3 How do I change my wallet address? \n", - "4 What are scouts? \n", - "5 How long does the contest process usually take? \n", - "6 how does certification work? \n", - "7 Can I use bots to analyze code? \n", - "8 What is a lookout? \n", - "\n", - " Mava correct answer (True value) \\\n", - "0 To get backstage access, you need to become a ... \n", - "1 Based on the context provided, the findings fr... \n", - "2 You can talk about your findings after the con... \n", - "3 To change your wallet address, follow these st... \n", - "4 In the context of Code4rena, Scouts are indivi... \n", - "5 Based on the provided context, the contest pro... \n", - "6 The certification process at Code4rena works i... \n", - "7 Yes, you can use bots to analyze code. In fact... \n", - "8 In the context provided, a lookout is a role i... \n", - "\n", - " Bot answers \\\n", - "0 To get backstage access, you need to become a ... \n", - "1 The findings are released between 42 to 60 day... \n", - "2 You can talk about findings after the findings... \n", - "3 You can change your wallet address by logging ... \n", - "4 Scouts in the context of Code4rena are individ... \n", - "5 The contest process usually takes between 42 t... \n", - "6 Certification works by submitting an applicati... \n", - "7 Yes, you can use bots to analyze code. Code4re... \n", - "8 In the context of Code4rena's competitions, a ... \n", - "\n", - " Retrieval relevancy score Answer similarity score \n", - "0 Correct Incorrect \n", - "1 Correct Correct \n", - "2 Incorrect Correct \n", - "3 Correct Correct \n", - "4 Incorrect Correct \n", - "5 Correct Correct \n", - "6 Correct Correct \n", - "7 Correct Correct \n", - "8 Correct Correct " - ] - }, - "execution_count": 548, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "auto_eval()" ] }, { "cell_type": "code", - "execution_count": 549, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1289,7 +935,7 @@ }, { "cell_type": "code", - "execution_count": 550, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1299,7 +945,7 @@ }, { "cell_type": "code", - "execution_count": 571, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1311,260 +957,9 @@ }, { "cell_type": "code", - "execution_count": 577, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questionBot answersSources
0Hi, how can I get backstage access?To get backstage access, you need to meet the ...https://github.com/code-423n4/docs/blob/main//...
1how long does it take until findings are relea...The findings are released between 42 to 60 day...https://github.com/code-423n4/docs/blob/main//...
2When can I talk about findings?You can talk about findings after they are mad...https://github.com/code-423n4/docs/blob/main//...
3How do I change my wallet address?You can change your wallet address by logging ...https://github.com/code-423n4/docs/blob/main//...
4What are scouts?Scouts in the context of Code4rena focus on sc...https://code4rena.com/how-it-works, https://gi...
5How long does the contest process usually take?The contest process usually takes between 42 t...https://github.com/code-423n4/docs/blob/main//...
6how does certification work?Certification works through a process where an...https://github.com/code-423n4/docs/blob/main//...
7Can I use bots to analyze code?Yes, you can use bots to analyze code. Code4re...https://code4rena.com/how-it-works, https://co...
8What is a lookout?In the context of Code4rena, a lookout is a ro...https://github.com/code-423n4/docs/blob/main//...
9what's a scout?In the context of Code4rena, a Scout is a role...https://github.com/code-423n4/docs/blob/main//...
10Am I allowed to use AI in an audit?Yes, you are allowed to use AI in an audit. Ho...https://github.com/code-423n4/docs/blob/main//...
11Can I change my Code4rena username?No, you cannot change your Code4rena username....https://code4rena.com/register
12How do I book a solo audit?To book a solo audit, a project team member sh...https://github.com/code-423n4/docs/blob/main//...
13Do I need to be certified to participate in an...Yes, you need to be a certified contributor to...https://github.com/code-423n4/docs/blob/main//...
14How do bot races work?Bot races work in two stages. In the first sta...https://code4rena.com/how-it-works, https://co...
15Can I change my Code4rena profile name?The documents do not provide information on wh...https://code4rena.com/register, https://github...
16What are scout awards?Scout awards are part of the incentive model u...https://github.com/code-423n4/docs/blob/main//...
17What are analysis reports?Analysis reports are written submissions that ...https://github.com/code-423n4/docs/blob/main//...
18what is an analysis finding?An analysis finding is a written submission th...https://github.com/code-423n4/docs/blob/main//...
19My name wasn't in the award announcements. Whe...You can confirm that Code4rena has received yo...https://github.com/code-423n4/docs/blob/main//...
20How long does the certification process take?Once you submit the certified contributor appl...https://github.com/code-423n4/docs/blob/main//...
21How can I access findings.csv?To access findings.csv, you need to sign into ...https://github.com/code-423n4/docs/blob/main//...
22Can I use chatgpt?The use of ChatGPT or similar automated tools ...https://github.com/code-423n4/docs/blob/main//...
\n", - "
" - ], - "text/plain": [ - " question \\\n", - "0 Hi, how can I get backstage access? \n", - "1 how long does it take until findings are relea... \n", - "2 When can I talk about findings? \n", - "3 How do I change my wallet address? \n", - "4 What are scouts? \n", - "5 How long does the contest process usually take? \n", - "6 how does certification work? \n", - "7 Can I use bots to analyze code? \n", - "8 What is a lookout? \n", - "9 what's a scout? \n", - "10 Am I allowed to use AI in an audit? \n", - "11 Can I change my Code4rena username? \n", - "12 How do I book a solo audit? \n", - "13 Do I need to be certified to participate in an... \n", - "14 How do bot races work? \n", - "15 Can I change my Code4rena profile name? \n", - "16 What are scout awards? \n", - "17 What are analysis reports? \n", - "18 what is an analysis finding? \n", - "19 My name wasn't in the award announcements. Whe... \n", - "20 How long does the certification process take? \n", - "21 How can I access findings.csv? \n", - "22 Can I use chatgpt? \n", - "\n", - " Bot answers \\\n", - "0 To get backstage access, you need to meet the ... \n", - "1 The findings are released between 42 to 60 day... \n", - "2 You can talk about findings after they are mad... \n", - "3 You can change your wallet address by logging ... \n", - "4 Scouts in the context of Code4rena focus on sc... \n", - "5 The contest process usually takes between 42 t... \n", - "6 Certification works through a process where an... \n", - "7 Yes, you can use bots to analyze code. Code4re... \n", - "8 In the context of Code4rena, a lookout is a ro... \n", - "9 In the context of Code4rena, a Scout is a role... \n", - "10 Yes, you are allowed to use AI in an audit. Ho... \n", - "11 No, you cannot change your Code4rena username.... \n", - "12 To book a solo audit, a project team member sh... \n", - "13 Yes, you need to be a certified contributor to... \n", - "14 Bot races work in two stages. In the first sta... \n", - "15 The documents do not provide information on wh... \n", - "16 Scout awards are part of the incentive model u... \n", - "17 Analysis reports are written submissions that ... \n", - "18 An analysis finding is a written submission th... \n", - "19 You can confirm that Code4rena has received yo... \n", - "20 Once you submit the certified contributor appl... \n", - "21 To access findings.csv, you need to sign into ... \n", - "22 The use of ChatGPT or similar automated tools ... \n", - "\n", - " Sources \n", - "0 https://github.com/code-423n4/docs/blob/main//... \n", - "1 https://github.com/code-423n4/docs/blob/main//... \n", - "2 https://github.com/code-423n4/docs/blob/main//... \n", - "3 https://github.com/code-423n4/docs/blob/main//... \n", - "4 https://code4rena.com/how-it-works, https://gi... \n", - "5 https://github.com/code-423n4/docs/blob/main//... \n", - "6 https://github.com/code-423n4/docs/blob/main//... \n", - "7 https://code4rena.com/how-it-works, https://co... \n", - "8 https://github.com/code-423n4/docs/blob/main//... \n", - "9 https://github.com/code-423n4/docs/blob/main//... \n", - "10 https://github.com/code-423n4/docs/blob/main//... \n", - "11 https://code4rena.com/register \n", - "12 https://github.com/code-423n4/docs/blob/main//... \n", - "13 https://github.com/code-423n4/docs/blob/main//... \n", - "14 https://code4rena.com/how-it-works, https://co... \n", - "15 https://code4rena.com/register, https://github... \n", - "16 https://github.com/code-423n4/docs/blob/main//... \n", - "17 https://github.com/code-423n4/docs/blob/main//... \n", - "18 https://github.com/code-423n4/docs/blob/main//... \n", - "19 https://github.com/code-423n4/docs/blob/main//... \n", - "20 https://github.com/code-423n4/docs/blob/main//... \n", - "21 https://github.com/code-423n4/docs/blob/main//... \n", - "22 https://github.com/code-423n4/docs/blob/main//... " - ] - }, - "execution_count": 577, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.DataFrame({\n", " \"question\": [q for q in eval_set],\n", @@ -1576,7 +971,7 @@ }, { "cell_type": "code", - "execution_count": 578, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1585,83 +980,9 @@ }, { "cell_type": "code", - "execution_count": 588, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "### Question" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "ORIGINAL: My wallet was hacked. What do I do?" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Answer" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "If your wallet was hacked, follow these steps:\n", - "\n", - "1. If you are not logged in and you haven't set up your password yet, click \"Log in\" from the connect dropdown and then click \"forgot password\" to get a password reset link.\n", - "2. Log in with your username and password.\n", - "3. Update your payment addresses from the account page.\n", - "4. Submit a help request through the Help Desk while logged in so that the hacked wallet can be removed from your account.\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "### Sources" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/code-423n4/docs/blob/main//roles/wardens/warden-auth.md\n" - ] - } - ], + "outputs": [], "source": [ "ask(\"My wallet was hacked. What do I do?\")" ] From e7f27462ee3cc08970bb7d33af209f5a295bc782 Mon Sep 17 00:00:00 2001 From: Sagar Shah Date: Thu, 5 Oct 2023 11:11:12 -0500 Subject: [PATCH 11/11] Added notebook to populate Weaviate DB with C4 doc embeddings (#8) --- .gitignore | 2 +- qa_bot/notebooks/c4_weaviate_upload.ipynb | 1249 ++++++++++++++++ qa_bot/notebooks/experiment_c4_qa_bot.ipynb | 1463 +++++++++++++++++++ qa_bot/qa_bot.ipynb | 1020 ------------- 4 files changed, 2713 insertions(+), 1021 deletions(-) create mode 100644 qa_bot/notebooks/c4_weaviate_upload.ipynb create mode 100644 qa_bot/notebooks/experiment_c4_qa_bot.ipynb delete mode 100644 qa_bot/qa_bot.ipynb diff --git a/.gitignore b/.gitignore index 6c4d9ee..1246733 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ .env .ipynb_checkpoints .DS_Store -outputs/*.xlsx \ No newline at end of file +outputs/*.xlsx \ No newline at end of file diff --git a/qa_bot/notebooks/c4_weaviate_upload.ipynb b/qa_bot/notebooks/c4_weaviate_upload.ipynb new file mode 100644 index 0000000..96bd6e7 --- /dev/null +++ b/qa_bot/notebooks/c4_weaviate_upload.ipynb @@ -0,0 +1,1249 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting langchain[llms]\n", + " Downloading langchain-0.0.302-py3-none-any.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting jsonpatch<2.0,>=1.33\n", + " Using cached jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "Collecting numpy<2,>=1\n", + " Using cached numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl (20.6 MB)\n", + "Collecting requests<3,>=2\n", + " Using cached requests-2.31.0-py3-none-any.whl (62 kB)\n", + "Collecting PyYAML>=5.3\n", + " Using cached PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl (189 kB)\n", + "Collecting dataclasses-json<0.7,>=0.5.7\n", + " Downloading dataclasses_json-0.6.1-py3-none-any.whl (27 kB)\n", + "Collecting async-timeout<5.0.0,>=4.0.0\n", + " Using cached async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n", + "Collecting aiohttp<4.0.0,>=3.8.3\n", + " Using cached aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl (365 kB)\n", + "Collecting SQLAlchemy<3,>=1.4\n", + " Using cached SQLAlchemy-2.0.21-cp310-cp310-macosx_10_9_x86_64.whl (2.1 MB)\n", + "Collecting anyio<4.0\n", + " Using cached anyio-3.7.1-py3-none-any.whl (80 kB)\n", + "Collecting langsmith<0.1.0,>=0.0.38\n", + " Using cached langsmith-0.0.40-py3-none-any.whl (39 kB)\n", + "Collecting numexpr<3.0.0,>=2.8.4\n", + " Downloading numexpr-2.8.7-cp310-cp310-macosx_10_9_x86_64.whl (102 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.7/102.7 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tenacity<9.0.0,>=8.1.0\n", + " Using cached tenacity-8.2.3-py3-none-any.whl (24 kB)\n", + "Collecting pydantic<3,>=1\n", + " Downloading pydantic-2.4.1-py3-none-any.whl (395 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m395.3/395.3 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting huggingface_hub<1,>=0\n", + " Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting transformers<5,>=4\n", + " Downloading transformers-4.33.2-py3-none-any.whl (7.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting openai<1,>=0\n", + " Downloading openai-0.28.1-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.0/77.0 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting torch<3,>=1\n", + " Using cached torch-2.0.1-cp310-none-macosx_10_9_x86_64.whl (143.4 MB)\n", + "Collecting clarifai>=9.1.0\n", + " Downloading clarifai-9.8.2-py3-none-any.whl (2.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting cohere<5,>=4\n", + " Downloading cohere-4.27-py3-none-any.whl (47 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.6/47.6 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nlpcloud<2,>=1\n", + " Using cached nlpcloud-1.1.44-py3-none-any.whl (4.4 kB)\n", + "Collecting manifest-ml<0.0.2,>=0.0.1\n", + " Using cached manifest_ml-0.0.1-py2.py3-none-any.whl (42 kB)\n", + "Collecting openlm<0.0.6,>=0.0.5\n", + " Using cached openlm-0.0.5-py3-none-any.whl (10 kB)\n", + "Collecting multidict<7.0,>=4.5\n", + " Using cached multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl (29 kB)\n", + "Collecting aiosignal>=1.1.2\n", + " Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", + "Collecting frozenlist>=1.1.1\n", + " Using cached frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl (46 kB)\n", + "Collecting charset-normalizer<4.0,>=2.0\n", + " Using cached charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl (126 kB)\n", + "Collecting yarl<2.0,>=1.0\n", + " Using cached yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl (65 kB)\n", + "Collecting attrs>=17.3.0\n", + " Using cached attrs-23.1.0-py3-none-any.whl (61 kB)\n", + "Collecting sniffio>=1.1\n", + " Using cached sniffio-1.3.0-py3-none-any.whl (10 kB)\n", + "Collecting idna>=2.8\n", + " Using cached idna-3.4-py3-none-any.whl (61 kB)\n", + "Requirement already satisfied: exceptiongroup in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from anyio<4.0->langchain[llms]) (1.1.3)\n", + "Collecting tritonclient==2.34.0\n", + " Using cached tritonclient-2.34.0-py3-none-any.whl (94 kB)\n", + "Collecting rich==13.4.2\n", + " Using cached rich-13.4.2-py3-none-any.whl (239 kB)\n", + "Collecting clarifai-grpc>=9.8.1\n", + " Downloading clarifai_grpc-9.8.4-py3-none-any.whl (218 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m218.4/218.4 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting tqdm==4.64.1\n", + " Using cached tqdm-4.64.1-py2.py3-none-any.whl (78 kB)\n", + "Collecting schema==0.7.5\n", + " Downloading schema-0.7.5-py2.py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", + "Collecting markdown-it-py>=2.2.0\n", + " Using cached markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (2.16.1)\n", + "Collecting contextlib2>=0.5.5\n", + " Downloading contextlib2-21.6.0-py2.py3-none-any.whl (13 kB)\n", + "Collecting python-rapidjson>=0.9.1\n", + " Downloading python_rapidjson-1.11-cp310-cp310-macosx_10_9_x86_64.whl (226 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.3/226.3 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting urllib3<3,>=1.26\n", + " Using cached urllib3-2.0.5-py3-none-any.whl (123 kB)\n", + "Collecting backoff<3.0,>=2.0\n", + " Using cached backoff-2.2.1-py3-none-any.whl (15 kB)\n", + "Collecting fastavro==1.8.2\n", + " Using cached fastavro-1.8.2-cp310-cp310-macosx_11_0_x86_64.whl (521 kB)\n", + "Collecting importlib_metadata<7.0,>=6.0\n", + " Using cached importlib_metadata-6.8.0-py3-none-any.whl (22 kB)\n", + "Collecting marshmallow<4.0.0,>=3.18.0\n", + " Using cached marshmallow-3.20.1-py3-none-any.whl (49 kB)\n", + "Collecting typing-inspect<1,>=0.4.0\n", + " Using cached typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Collecting fsspec\n", + " Downloading fsspec-2023.9.2-py3-none-any.whl (173 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.4/173.4 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m-:--:--\u001b[0m\n", + "\u001b[?25hCollecting filelock\n", + " Using cached filelock-3.12.4-py3-none-any.whl (11 kB)\n", + "Collecting typing-extensions>=3.7.4.3\n", + " Using cached typing_extensions-4.8.0-py3-none-any.whl (31 kB)\n", + "Collecting jsonpointer>=1.9\n", + " Using cached jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", + "Collecting redis>=4.3.1\n", + " Downloading redis-5.0.1-py3-none-any.whl (250 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.3/250.3 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting sqlitedict>=2.0.0\n", + " Using cached sqlitedict-2.1.0.tar.gz (21 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting dill>=0.3.5\n", + " Using cached dill-0.3.7-py3-none-any.whl (115 kB)\n", + "Collecting annotated-types>=0.4.0\n", + " Using cached annotated_types-0.5.0-py3-none-any.whl (11 kB)\n", + "Collecting pydantic-core==2.10.1\n", + " Downloading pydantic_core-2.10.1-cp310-cp310-macosx_10_7_x86_64.whl (1.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m:00:01\u001b[0m\n", + "\u001b[?25hCollecting certifi>=2017.4.17\n", + " Using cached certifi-2023.7.22-py3-none-any.whl (158 kB)\n", + "Collecting greenlet!=0.4.17\n", + " Using cached greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl (242 kB)\n", + "Collecting jinja2\n", + " Using cached Jinja2-3.1.2-py3-none-any.whl (133 kB)\n", + "Collecting networkx\n", + " Using cached networkx-3.1-py3-none-any.whl (2.1 MB)\n", + "Collecting sympy\n", + " Using cached sympy-1.12-py3-none-any.whl (5.7 MB)\n", + "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n", + " Using cached tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl (4.0 MB)\n", + "Collecting safetensors>=0.3.1\n", + " Using cached safetensors-0.3.3-cp310-cp310-macosx_12_0_x86_64.whl (403 kB)\n", + "Collecting regex!=2019.12.17\n", + " Using cached regex-2023.8.8-cp310-cp310-macosx_10_9_x86_64.whl (294 kB)\n", + "Collecting protobuf>=3.20.3\n", + " Downloading protobuf-4.24.3-cp37-abi3-macosx_10_9_universal2.whl (409 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m409.4/409.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting grpcio>=1.44.0\n", + " Downloading grpcio-1.58.0-cp310-cp310-macosx_12_0_universal2.whl (9.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n", + "\u001b[?25hCollecting googleapis-common-protos>=1.53.0\n", + " Using cached googleapis_common_protos-1.60.0-py2.py3-none-any.whl (227 kB)\n", + "Collecting zipp>=0.5\n", + " Downloading zipp-3.17.0-py3-none-any.whl (7.4 kB)\n", + "Collecting mypy-extensions>=0.3.0\n", + " Using cached mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Collecting MarkupSafe>=2.0\n", + " Using cached MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl (13 kB)\n", + "Collecting mpmath>=0.19\n", + " Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + "Collecting mdurl~=0.1\n", + " Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", + "Installing collected packages: tokenizers, sqlitedict, safetensors, mpmath, zipp, urllib3, typing-extensions, tqdm, tenacity, sympy, sniffio, regex, PyYAML, python-rapidjson, protobuf, numpy, networkx, mypy-extensions, multidict, mdurl, marshmallow, MarkupSafe, jsonpointer, idna, grpcio, greenlet, fsspec, frozenlist, filelock, fastavro, dill, contextlib2, charset-normalizer, certifi, backoff, attrs, async-timeout, annotated-types, yarl, typing-inspect, tritonclient, SQLAlchemy, schema, requests, redis, pydantic-core, numexpr, markdown-it-py, jsonpatch, jinja2, importlib_metadata, googleapis-common-protos, anyio, aiosignal, torch, rich, pydantic, openlm, nlpcloud, manifest-ml, huggingface_hub, dataclasses-json, clarifai-grpc, aiohttp, transformers, openai, langsmith, cohere, clarifai, langchain\n", + "\u001b[33m DEPRECATION: sqlitedict is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m Running setup.py install for sqlitedict ... \u001b[?25ldone\n", + "\u001b[?25hSuccessfully installed MarkupSafe-2.1.3 PyYAML-6.0.1 SQLAlchemy-2.0.21 aiohttp-3.8.5 aiosignal-1.3.1 annotated-types-0.5.0 anyio-3.7.1 async-timeout-4.0.3 attrs-23.1.0 backoff-2.2.1 certifi-2023.7.22 charset-normalizer-3.2.0 clarifai-9.8.2 clarifai-grpc-9.8.4 cohere-4.27 contextlib2-21.6.0 dataclasses-json-0.6.1 dill-0.3.7 fastavro-1.8.2 filelock-3.12.4 frozenlist-1.4.0 fsspec-2023.9.2 googleapis-common-protos-1.60.0 greenlet-2.0.2 grpcio-1.58.0 huggingface_hub-0.17.3 idna-3.4 importlib_metadata-6.8.0 jinja2-3.1.2 jsonpatch-1.33 jsonpointer-2.4 langchain-0.0.302 langsmith-0.0.40 manifest-ml-0.0.1 markdown-it-py-3.0.0 marshmallow-3.20.1 mdurl-0.1.2 mpmath-1.3.0 multidict-6.0.4 mypy-extensions-1.0.0 networkx-3.1 nlpcloud-1.1.44 numexpr-2.8.7 numpy-1.26.0 openai-0.28.1 openlm-0.0.5 protobuf-4.24.3 pydantic-2.4.1 pydantic-core-2.10.1 python-rapidjson-1.11 redis-5.0.1 regex-2023.8.8 requests-2.31.0 rich-13.4.2 safetensors-0.3.3 schema-0.7.5 sniffio-1.3.0 sqlitedict-2.1.0 sympy-1.12 tenacity-8.2.3 tokenizers-0.13.3 torch-2.0.1 tqdm-4.64.1 transformers-4.33.2 tritonclient-2.34.0 typing-extensions-4.8.0 typing-inspect-0.9.0 urllib3-2.0.5 yarl-1.9.2 zipp-3.17.0\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting Scrapy\n", + " Downloading Scrapy-2.11.0-py2.py3-none-any.whl (286 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.4/286.4 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting zope.interface>=5.1.0\n", + " Using cached zope.interface-6.0-cp310-cp310-macosx_10_9_x86_64.whl (202 kB)\n", + "Collecting lxml>=4.4.1\n", + " Using cached lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl (4.8 MB)\n", + "Collecting tldextract\n", + " Downloading tldextract-3.6.0-py3-none-any.whl (97 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.4/97.4 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting cryptography>=36.0.0\n", + " Using cached cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl (2.8 MB)\n", + "Collecting PyDispatcher>=2.0.5\n", + " Using cached PyDispatcher-2.0.7-py3-none-any.whl (12 kB)\n", + "Collecting parsel>=1.5.0\n", + " Using cached parsel-1.8.1-py2.py3-none-any.whl (17 kB)\n", + "Collecting itemadapter>=0.1.0\n", + " Using cached itemadapter-0.8.0-py3-none-any.whl (11 kB)\n", + "Collecting Twisted<23.8.0,>=18.9.0\n", + " Using cached Twisted-22.10.0-py3-none-any.whl (3.1 MB)\n", + "Collecting cssselect>=0.9.1\n", + " Using cached cssselect-1.2.0-py2.py3-none-any.whl (18 kB)\n", + "Collecting protego>=0.1.15\n", + " Using cached Protego-0.3.0-py2.py3-none-any.whl (8.5 kB)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from Scrapy) (23.1)\n", + "Collecting pyOpenSSL>=21.0.0\n", + " Using cached pyOpenSSL-23.2.0-py3-none-any.whl (59 kB)\n", + "Collecting w3lib>=1.17.0\n", + " Using cached w3lib-2.1.2-py3-none-any.whl (21 kB)\n", + "Collecting queuelib>=1.4.2\n", + " Using cached queuelib-1.6.2-py2.py3-none-any.whl (13 kB)\n", + "Collecting itemloaders>=1.0.1\n", + " Using cached itemloaders-1.1.0-py3-none-any.whl (11 kB)\n", + "Collecting service-identity>=18.1.0\n", + " Using cached service_identity-23.1.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from Scrapy) (65.5.0)\n", + "Collecting cffi>=1.12\n", + " Using cached cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl (179 kB)\n", + "Collecting jmespath>=0.9.5\n", + " Using cached jmespath-1.0.1-py3-none-any.whl (20 kB)\n", + "Collecting pyasn1-modules\n", + " Using cached pyasn1_modules-0.3.0-py2.py3-none-any.whl (181 kB)\n", + "Requirement already satisfied: attrs>=19.1.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (23.1.0)\n", + "Collecting pyasn1\n", + " Using cached pyasn1-0.5.0-py2.py3-none-any.whl (83 kB)\n", + "Collecting Automat>=0.8.0\n", + " Using cached Automat-22.10.0-py2.py3-none-any.whl (26 kB)\n", + "Collecting incremental>=21.3.0\n", + " Using cached incremental-22.10.0-py2.py3-none-any.whl (16 kB)\n", + "Collecting hyperlink>=17.1.1\n", + " Using cached hyperlink-21.0.0-py2.py3-none-any.whl (74 kB)\n", + "Requirement already satisfied: typing-extensions>=3.6.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (4.8.0)\n", + "Collecting constantly>=15.1\n", + " Using cached constantly-15.1.0-py2.py3-none-any.whl (7.9 kB)\n", + "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", + "Requirement already satisfied: requests>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from tldextract->Scrapy) (2.31.0)\n", + "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.4)\n", + "Collecting requests-file>=1.4\n", + " Using cached requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from Automat>=0.8.0->Twisted<23.8.0,>=18.9.0->Scrapy) (1.16.0)\n", + "Collecting pycparser\n", + " Using cached pycparser-2.21-py2.py3-none-any.whl (118 kB)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (3.2.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2.0.5)\n", + "Installing collected packages: PyDispatcher, incremental, constantly, zope.interface, w3lib, queuelib, pycparser, pyasn1, protego, lxml, jmespath, itemadapter, hyperlink, cssselect, Automat, Twisted, requests-file, pyasn1-modules, parsel, cffi, tldextract, itemloaders, cryptography, service-identity, pyOpenSSL, Scrapy\n", + "Successfully installed Automat-22.10.0 PyDispatcher-2.0.7 Scrapy-2.11.0 Twisted-22.10.0 cffi-1.15.1 constantly-15.1.0 cryptography-41.0.4 cssselect-1.2.0 hyperlink-21.0.0 incremental-22.10.0 itemadapter-0.8.0 itemloaders-1.1.0 jmespath-1.0.1 lxml-4.9.3 parsel-1.8.1 protego-0.3.0 pyOpenSSL-23.2.0 pyasn1-0.5.0 pyasn1-modules-0.3.0 pycparser-2.21 queuelib-1.6.2 requests-file-1.5.1 service-identity-23.1.0 tldextract-3.6.0 w3lib-2.1.2 zope.interface-6.0\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting html2text\n", + " Using cached html2text-2020.1.16-py3-none-any.whl (32 kB)\n", + "Installing collected packages: html2text\n", + "Successfully installed html2text-2020.1.16\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (4.9.3)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting python-dotenv\n", + " Using cached python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", + "Installing collected packages: python-dotenv\n", + "Successfully installed python-dotenv-1.0.0\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting unstructured[all-docs]\n", + " Downloading unstructured-0.10.16-py3-none-any.whl (1.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m0m\n", + "\u001b[?25hCollecting python-iso639\n", + " Downloading python_iso639-2023.6.15-py3-none-any.whl (275 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m275.1/275.1 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting emoji\n", + " Using cached emoji-2.8.0-py2.py3-none-any.whl (358 kB)\n", + "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.1)\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", + "Collecting filetype\n", + " Using cached filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n", + "Collecting tabulate\n", + " Using cached tabulate-0.9.0-py3-none-any.whl (35 kB)\n", + "Collecting nltk\n", + " Using cached nltk-3.8.1-py3-none-any.whl (1.5 MB)\n", + "Collecting python-magic\n", + " Using cached python_magic-0.4.27-py2.py3-none-any.whl (13 kB)\n", + "Requirement already satisfied: requests in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from unstructured[all-docs]) (2.31.0)\n", + "Collecting chardet\n", + " Using cached chardet-5.2.0-py3-none-any.whl (199 kB)\n", + "Collecting beautifulsoup4\n", + " Using cached beautifulsoup4-4.12.2-py3-none-any.whl (142 kB)\n", + "Collecting markdown\n", + " Using cached Markdown-3.4.4-py3-none-any.whl (94 kB)\n", + "Collecting msg-parser\n", + " Using cached msg_parser-1.2.0-py2.py3-none-any.whl (101 kB)\n", + "Collecting xlrd\n", + " Using cached xlrd-2.0.1-py2.py3-none-any.whl (96 kB)\n", + "Collecting ebooklib\n", + " Using cached EbookLib-0.18.tar.gz (115 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting unstructured-inference\n", + " Downloading unstructured_inference-0.6.3-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.0/59.0 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting python-docx\n", + " Using cached python_docx-0.8.11-py3-none-any.whl\n", + "Collecting unstructured.pytesseract>=0.3.12\n", + " Downloading unstructured.pytesseract-0.3.12-py3-none-any.whl (14 kB)\n", + "Collecting pdfminer.six\n", + " Using cached pdfminer.six-20221105-py3-none-any.whl (5.6 MB)\n", + "Collecting python-pptx<=0.6.21\n", + " Using cached python_pptx-0.6.21-py3-none-any.whl\n", + "Collecting pypandoc\n", + " Using cached pypandoc-1.11-py3-none-any.whl (20 kB)\n", + "Collecting pandas\n", + " Downloading pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl (11.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.7/11.7 MB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting openpyxl\n", + " Using cached openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)\n", + "Collecting pdf2image\n", + " Using cached pdf2image-1.16.3-py3-none-any.whl (11 kB)\n", + "Collecting Pillow>=3.3.2\n", + " Downloading Pillow-10.0.1-cp310-cp310-macosx_10_10_x86_64.whl (3.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.7/3.7 MB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting XlsxWriter>=0.5.7\n", + " Downloading XlsxWriter-3.1.5-py3-none-any.whl (153 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.0/154.0 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging>=21.3 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from unstructured.pytesseract>=0.3.12->unstructured[all-docs]) (23.1)\n", + "Collecting soupsieve>1.2\n", + " Using cached soupsieve-2.5-py3-none-any.whl (36 kB)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ebooklib->unstructured[all-docs]) (1.16.0)\n", + "Collecting olefile>=0.46\n", + " Using cached olefile-0.46-py2.py3-none-any.whl\n", + "Requirement already satisfied: regex>=2021.8.3 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (2023.8.8)\n", + "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", + "Collecting click\n", + " Using cached click-8.1.7-py3-none-any.whl (97 kB)\n", + "Collecting joblib\n", + " Using cached joblib-1.3.2-py3-none-any.whl (302 kB)\n", + "Collecting et-xmlfile\n", + " Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)\n", + "Collecting tzdata>=2022.1\n", + " Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n", + "Collecting pytz>=2020.1\n", + " Using cached pytz-2023.3.post1-py2.py3-none-any.whl (502 kB)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.26.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2.8.2)\n", + "Requirement already satisfied: charset-normalizer>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (3.2.0)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (41.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2023.7.22)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2.0.5)\n", + "Collecting onnx==1.14.1\n", + " Using cached onnx-1.14.1-cp310-cp310-macosx_10_12_x86_64.whl (13.8 MB)\n", + "Collecting rapidfuzz\n", + " Downloading rapidfuzz-3.3.1-cp310-cp310-macosx_10_9_x86_64.whl (2.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting python-multipart\n", + " Using cached python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", + "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.33.2)\n", + "Collecting opencv-python!=4.7.0.68\n", + " Using cached opencv_python-4.8.0.76-cp37-abi3-macosx_10_16_x86_64.whl (54.7 MB)\n", + "Collecting layoutparser[layoutmodels,tesseract]\n", + " Using cached layoutparser-0.3.4-py3-none-any.whl (19.2 MB)\n", + "Collecting onnxruntime\n", + " Downloading onnxruntime-1.16.0-cp310-cp310-macosx_10_15_x86_64.whl (6.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.9/6.9 MB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: huggingface-hub in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.17.3)\n", + "Requirement already satisfied: protobuf>=3.20.2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnx==1.14.1->unstructured-inference->unstructured[all-docs]) (4.24.3)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnx==1.14.1->unstructured-inference->unstructured[all-docs]) (4.8.0)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.13.3)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.4)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.3.3)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (2023.9.2)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json->unstructured[all-docs]) (1.0.0)\n", + "Collecting scipy\n", + " Using cached scipy-1.11.2-cp310-cp310-macosx_10_9_x86_64.whl (37.2 MB)\n", + "Collecting iopath\n", + " Using cached iopath-0.1.10.tar.gz (42 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting pdfplumber\n", + " Using cached pdfplumber-0.10.2-py3-none-any.whl (47 kB)\n", + "Collecting pytesseract\n", + " Using cached pytesseract-0.3.10-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: torch in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.1)\n", + "Collecting torchvision\n", + " Using cached torchvision-0.15.2-cp310-cp310-macosx_10_9_x86_64.whl (1.5 MB)\n", + "Collecting effdet\n", + " Using cached effdet-0.4.1-py3-none-any.whl (112 kB)\n", + "Collecting coloredlogs\n", + " Using cached coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (1.12)\n", + "Collecting flatbuffers\n", + " Using cached flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.21)\n", + "Collecting humanfriendly>=9.1\n", + " Using cached humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", + "Collecting pycocotools>=2.0.2\n", + " Using cached pycocotools-2.0.7-cp310-cp310-macosx_10_9_universal2.whl (169 kB)\n", + "Collecting timm>=0.9.2\n", + " Using cached timm-0.9.7-py3-none-any.whl (2.2 MB)\n", + "Collecting omegaconf>=2.0\n", + " Using cached omegaconf-2.3.0-py3-none-any.whl (79 kB)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1.2)\n", + "Collecting portalocker\n", + " Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n", + "Collecting pypdfium2>=4.18.0\n", + " Downloading pypdfium2-4.20.0-py3-none-macosx_10_13_x86_64.whl (3.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from sympy->onnxruntime->unstructured-inference->unstructured[all-docs]) (1.3.0)\n", + "Collecting antlr4-python3-runtime==4.9.*\n", + " Using cached antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting matplotlib>=2.1.0\n", + " Downloading matplotlib-3.8.0-cp310-cp310-macosx_10_12_x86_64.whl (7.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.1.3)\n", + "Collecting kiwisolver>=1.0.1\n", + " Using cached kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl (68 kB)\n", + "Collecting fonttools>=4.22.0\n", + " Using cached fonttools-4.42.1-cp310-cp310-macosx_10_9_x86_64.whl (2.2 MB)\n", + "Collecting pyparsing>=2.3.1\n", + " Downloading pyparsing-3.1.1-py3-none-any.whl (103 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.1/103.1 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting cycler>=0.10\n", + " Using cached cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", + "Collecting contourpy>=1.0.1\n", + " Downloading contourpy-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl (247 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m247.2/247.2 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: pytz, flatbuffers, filetype, antlr4-python3-runtime, XlsxWriter, xlrd, tzdata, tabulate, soupsieve, scipy, rapidfuzz, python-multipart, python-magic, python-iso639, python-docx, pypdfium2, pyparsing, pypandoc, portalocker, Pillow, opencv-python, onnx, omegaconf, olefile, markdown, kiwisolver, joblib, humanfriendly, fonttools, et-xmlfile, emoji, ebooklib, cycler, contourpy, click, chardet, unstructured.pytesseract, python-pptx, pytesseract, pdf2image, pandas, openpyxl, nltk, msg-parser, matplotlib, iopath, coloredlogs, beautifulsoup4, unstructured, torchvision, pycocotools, pdfminer.six, onnxruntime, timm, pdfplumber, layoutparser, effdet, unstructured-inference\n", + "\u001b[33m DEPRECATION: antlr4-python3-runtime is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m Running setup.py install for antlr4-python3-runtime ... \u001b[?25ldone\n", + "\u001b[?25h\u001b[33m DEPRECATION: ebooklib is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m Running setup.py install for ebooklib ... \u001b[?25ldone\n", + "\u001b[?25h\u001b[33m DEPRECATION: iopath is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m Running setup.py install for iopath ... \u001b[?25ldone\n", + "\u001b[?25hSuccessfully installed Pillow-10.0.1 XlsxWriter-3.1.5 antlr4-python3-runtime-4.9.3 beautifulsoup4-4.12.2 chardet-5.2.0 click-8.1.7 coloredlogs-15.0.1 contourpy-1.1.1 cycler-0.11.0 ebooklib-0.18 effdet-0.4.1 emoji-2.8.0 et-xmlfile-1.1.0 filetype-1.2.0 flatbuffers-23.5.26 fonttools-4.42.1 humanfriendly-10.0 iopath-0.1.10 joblib-1.3.2 kiwisolver-1.4.5 layoutparser-0.3.4 markdown-3.4.4 matplotlib-3.8.0 msg-parser-1.2.0 nltk-3.8.1 olefile-0.46 omegaconf-2.3.0 onnx-1.14.1 onnxruntime-1.16.0 opencv-python-4.8.0.76 openpyxl-3.1.2 pandas-2.1.1 pdf2image-1.16.3 pdfminer.six-20221105 pdfplumber-0.10.2 portalocker-2.8.2 pycocotools-2.0.7 pypandoc-1.11 pyparsing-3.1.1 pypdfium2-4.20.0 pytesseract-0.3.10 python-docx-0.8.11 python-iso639-2023.6.15 python-magic-0.4.27 python-multipart-0.0.6 python-pptx-0.6.21 pytz-2023.3.post1 rapidfuzz-3.3.1 scipy-1.11.2 soupsieve-2.5 tabulate-0.9.0 timm-0.9.7 torchvision-0.15.2 tzdata-2023.3 unstructured-0.10.16 unstructured-inference-0.6.3 unstructured.pytesseract-0.3.12 xlrd-2.0.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting tiktoken\n", + " Downloading tiktoken-0.5.1-cp310-cp310-macosx_10_9_x86_64.whl (953 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.5/953.5 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", + "Requirement already satisfied: requests>=2.26.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from tiktoken) (2.31.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", + "Installing collected packages: tiktoken\n", + "Successfully installed tiktoken-0.5.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting faiss-cpu\n", + " Using cached faiss_cpu-1.7.4-cp310-cp310-macosx_10_9_x86_64.whl (6.5 MB)\n", + "Installing collected packages: faiss-cpu\n", + "Successfully installed faiss-cpu-1.7.4\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting GitPython\n", + " Downloading GitPython-3.1.37-py3-none-any.whl (190 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m190.0/190.0 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting gitdb<5,>=4.0.1\n", + " Using cached gitdb-4.0.10-py3-none-any.whl (62 kB)\n", + "Collecting smmap<6,>=3.0.1\n", + " Downloading smmap-5.0.1-py3-none-any.whl (24 kB)\n", + "Installing collected packages: smmap, gitdb, GitPython\n", + "Successfully installed GitPython-3.1.37 gitdb-4.0.10 smmap-5.0.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting notebook\n", + " Downloading notebook-7.0.4-py3-none-any.whl (4.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting jupyterlab-server<3,>=2.22.1\n", + " Downloading jupyterlab_server-2.25.0-py3-none-any.whl (57 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.2/57.2 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting jupyter-server<3,>=2.4.0\n", + " Using cached jupyter_server-2.7.3-py3-none-any.whl (375 kB)\n", + "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from notebook) (6.3.3)\n", + "Collecting jupyterlab<5,>=4.0.2\n", + " Downloading jupyterlab-4.0.6-py3-none-any.whl (9.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.2/9.2 MB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting notebook-shim<0.3,>=0.2\n", + " Using cached notebook_shim-0.2.3-py3-none-any.whl (13 kB)\n", + "Requirement already satisfied: traitlets>=5.6.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.10.1)\n", + "Collecting nbconvert>=6.4.4\n", + " Using cached nbconvert-7.8.0-py3-none-any.whl (254 kB)\n", + "Collecting send2trash>=1.8.2\n", + " Using cached Send2Trash-1.8.2-py3-none-any.whl (18 kB)\n", + "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.7.1)\n", + "Collecting websocket-client\n", + " Downloading websocket_client-1.6.3-py3-none-any.whl (57 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.3/57.3 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: jupyter-client>=7.4.4 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (8.3.1)\n", + "Collecting prometheus-client\n", + " Using cached prometheus_client-0.17.1-py3-none-any.whl (60 kB)\n", + "Collecting overrides\n", + " Using cached overrides-7.4.0-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.3.1)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", + "Collecting terminado>=0.8.3\n", + " Using cached terminado-0.17.1-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: pyzmq>=24 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (25.1.1)\n", + "Collecting argon2-cffi\n", + " Using cached argon2_cffi-23.1.0-py3-none-any.whl (15 kB)\n", + "Collecting nbformat>=5.3.0\n", + " Using cached nbformat-5.9.2-py3-none-any.whl (77 kB)\n", + "Collecting jupyter-server-terminals\n", + " Using cached jupyter_server_terminals-0.4.4-py3-none-any.whl (13 kB)\n", + "Collecting jupyter-events>=0.6.0\n", + " Using cached jupyter_events-0.7.0-py3-none-any.whl (18 kB)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", + "Collecting tomli\n", + " Using cached tomli-2.0.1-py3-none-any.whl (12 kB)\n", + "Collecting jupyter-lsp>=2.0.0\n", + " Using cached jupyter_lsp-2.2.0-py3-none-any.whl (65 kB)\n", + "Collecting async-lru>=1.0.0\n", + " Using cached async_lru-2.0.4-py3-none-any.whl (6.1 kB)\n", + "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.2)\n", + "Requirement already satisfied: requests>=2.31 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.31.0)\n", + "Collecting json5>=0.9.0\n", + " Using cached json5-0.9.14-py2.py3-none-any.whl (19 kB)\n", + "Collecting jsonschema>=4.18.0\n", + " Downloading jsonschema-4.19.1-py3-none-any.whl (83 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.3/83.3 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting babel>=2.10\n", + " Using cached Babel-2.12.1-py3-none-any.whl (10.1 MB)\n", + "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.1.3)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook) (2.1.3)\n", + "Collecting referencing>=0.28.4\n", + " Using cached referencing-0.30.2-py3-none-any.whl (25 kB)\n", + "Collecting rpds-py>=0.7.1\n", + " Downloading rpds_py-0.10.3-cp310-cp310-macosx_10_7_x86_64.whl (333 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m333.0/333.0 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting jsonschema-specifications>=2023.03.6\n", + " Using cached jsonschema_specifications-2023.7.1-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-client>=7.4.4->jupyter-server<3,>=2.4.0->notebook) (2.8.2)\n", + "Requirement already satisfied: platformdirs>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->jupyter-server<3,>=2.4.0->notebook) (3.10.0)\n", + "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", + "Collecting rfc3986-validator>=0.1.1\n", + " Using cached rfc3986_validator-0.1.1-py2.py3-none-any.whl (4.2 kB)\n", + "Collecting rfc3339-validator\n", + " Using cached rfc3339_validator-0.1.4-py2.py3-none-any.whl (3.5 kB)\n", + "Collecting python-json-logger>=2.0.4\n", + " Using cached python_json_logger-2.0.7-py3-none-any.whl (8.1 kB)\n", + "Collecting mistune<4,>=2.0.3\n", + " Using cached mistune-3.0.1-py3-none-any.whl (47 kB)\n", + "Collecting bleach!=5.0.0\n", + " Using cached bleach-6.0.0-py3-none-any.whl (162 kB)\n", + "Requirement already satisfied: pygments>=2.4.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.16.1)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", + "Collecting tinycss2\n", + " Using cached tinycss2-1.2.1-py3-none-any.whl (21 kB)\n", + "Collecting pandocfilters>=1.4.1\n", + " Using cached pandocfilters-1.5.0-py2.py3-none-any.whl (8.7 kB)\n", + "Collecting nbclient>=0.5.0\n", + " Using cached nbclient-0.8.0-py3-none-any.whl (73 kB)\n", + "Collecting defusedxml\n", + " Using cached defusedxml-0.7.1-py2.py3-none-any.whl (25 kB)\n", + "Collecting jupyterlab-pygments\n", + " Using cached jupyterlab_pygments-0.2.2-py2.py3-none-any.whl (21 kB)\n", + "Collecting fastjsonschema\n", + " Using cached fastjsonschema-2.18.0-py3-none-any.whl (23 kB)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.22.1->notebook) (2.0.5)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.22.1->notebook) (3.2.0)\n", + "Requirement already satisfied: ptyprocess in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from terminado>=0.8.3->jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Collecting argon2-cffi-bindings\n", + " Using cached argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl (53 kB)\n", + "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", + "Requirement already satisfied: debugpy>=1.6.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.8.0)\n", + "Requirement already satisfied: ipython>=7.23.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (8.15.0)\n", + "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", + "Requirement already satisfied: nest-asyncio in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.5.8)\n", + "Requirement already satisfied: psutil in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.9.5)\n", + "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", + "Collecting webencodings\n", + " Using cached webencodings-0.5.1-py2.py3-none-any.whl (11 kB)\n", + "Requirement already satisfied: six>=1.9.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.16.0)\n", + "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", + "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (3.0.39)\n", + "Requirement already satisfied: decorator in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.1.1)\n", + "Requirement already satisfied: pexpect>4.3 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", + "Requirement already satisfied: stack-data in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.6.2)\n", + "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", + "Collecting uri-template\n", + " Using cached uri_template-1.3.0-py3-none-any.whl (11 kB)\n", + "Collecting webcolors>=1.11\n", + " Using cached webcolors-1.13-py3-none-any.whl (14 kB)\n", + "Collecting isoduration\n", + " Using cached isoduration-20.11.0-py3-none-any.whl (11 kB)\n", + "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", + "Collecting fqdn\n", + " Using cached fqdn-1.5.1-py3-none-any.whl (9.1 kB)\n", + "Requirement already satisfied: cffi>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (1.15.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.5)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (2.21)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.8.3)\n", + "Requirement already satisfied: wcwidth in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.6)\n", + "Collecting arrow>=0.15.0\n", + " Using cached arrow-1.2.3-py3-none-any.whl (66 kB)\n", + "Requirement already satisfied: asttokens>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (2.4.0)\n", + "Requirement already satisfied: executing>=1.2.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.2.0)\n", + "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", + "Installing collected packages: webencodings, json5, fastjsonschema, websocket-client, webcolors, uri-template, tomli, tinycss2, terminado, send2trash, rpds-py, rfc3986-validator, rfc3339-validator, python-json-logger, prometheus-client, pandocfilters, overrides, mistune, jupyterlab-pygments, fqdn, defusedxml, bleach, babel, async-lru, referencing, jupyter-server-terminals, arrow, argon2-cffi-bindings, jsonschema-specifications, isoduration, argon2-cffi, jsonschema, nbformat, nbclient, jupyter-events, nbconvert, jupyter-server, notebook-shim, jupyterlab-server, jupyter-lsp, jupyterlab, notebook\n", + "Successfully installed argon2-cffi-23.1.0 argon2-cffi-bindings-21.2.0 arrow-1.2.3 async-lru-2.0.4 babel-2.12.1 bleach-6.0.0 defusedxml-0.7.1 fastjsonschema-2.18.0 fqdn-1.5.1 isoduration-20.11.0 json5-0.9.14 jsonschema-4.19.1 jsonschema-specifications-2023.7.1 jupyter-events-0.7.0 jupyter-lsp-2.2.0 jupyter-server-2.7.3 jupyter-server-terminals-0.4.4 jupyterlab-4.0.6 jupyterlab-pygments-0.2.2 jupyterlab-server-2.25.0 mistune-3.0.1 nbclient-0.8.0 nbconvert-7.8.0 nbformat-5.9.2 notebook-7.0.4 notebook-shim-0.2.3 overrides-7.4.0 pandocfilters-1.5.0 prometheus-client-0.17.1 python-json-logger-2.0.7 referencing-0.30.2 rfc3339-validator-0.1.4 rfc3986-validator-0.1.1 rpds-py-0.10.3 send2trash-1.8.2 terminado-0.17.1 tinycss2-1.2.1 tomli-2.0.1 uri-template-1.3.0 webcolors-1.13 webencodings-0.5.1 websocket-client-1.6.3\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting chromadb\n", + " Downloading chromadb-0.4.13-py3-none-any.whl (437 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m437.8/437.8 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting tqdm>=4.65.0\n", + " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: pydantic>=1.9 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from chromadb) (2.4.1)\n", + "Collecting pulsar-client>=3.1.0\n", + " Using cached pulsar_client-3.3.0-cp310-cp310-macosx_10_15_universal2.whl (10.9 MB)\n", + "Collecting importlib-resources\n", + " Downloading importlib_resources-6.1.0-py3-none-any.whl (33 kB)\n", + "Collecting bcrypt>=4.0.1\n", + " Using cached bcrypt-4.0.1-cp36-abi3-macosx_10_10_universal2.whl (473 kB)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from chromadb) (4.8.0)\n", + "Requirement already satisfied: numpy>=1.22.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from chromadb) (1.26.0)\n", + "Collecting chroma-hnswlib==0.7.3\n", + " Downloading chroma_hnswlib-0.7.3-cp310-cp310-macosx_10_9_x86_64.whl (219 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m219.6/219.6 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", + "Collecting fastapi>=0.95.2\n", + " Downloading fastapi-0.103.1-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.2/66.2 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting typer>=0.9.0\n", + " Using cached typer-0.9.0-py3-none-any.whl (45 kB)\n", + "Collecting posthog>=2.4.0\n", + " Using cached posthog-3.0.2-py2.py3-none-any.whl (37 kB)\n", + "Collecting pypika>=0.48.9\n", + " Using cached PyPika-0.48.9-py2.py3-none-any.whl\n", + "Collecting uvicorn[standard]>=0.18.3\n", + " Using cached uvicorn-0.23.2-py3-none-any.whl (59 kB)\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from chromadb) (1.16.0)\n", + "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from chromadb) (0.13.3)\n", + "Collecting starlette<0.28.0,>=0.27.0\n", + " Using cached starlette-0.27.0-py3-none-any.whl (66 kB)\n", + "Requirement already satisfied: anyio<4.0.0,>=3.7.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from fastapi>=0.95.2->chromadb) (3.7.1)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.3)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", + "Requirement already satisfied: backoff>=1.10.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", + "Requirement already satisfied: python-dateutil>2.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n", + "Collecting monotonic>=1.5\n", + " Using cached monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n", + "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: certifi in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n", + "Requirement already satisfied: pydantic-core==2.10.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pydantic>=1.9->chromadb) (2.10.1)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pydantic>=1.9->chromadb) (0.5.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", + "Requirement already satisfied: click<9.0.0,>=7.1.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from typer>=0.9.0->chromadb) (8.1.7)\n", + "Collecting h11>=0.8\n", + " Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", + "Collecting watchfiles>=0.13\n", + " Using cached watchfiles-0.20.0-cp37-abi3-macosx_10_7_x86_64.whl (417 kB)\n", + "Requirement already satisfied: python-dotenv>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", + "Collecting websockets>=10.4\n", + " Using cached websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl (120 kB)\n", + "Collecting httptools>=0.5.0\n", + " Using cached httptools-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl (164 kB)\n", + "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0\n", + " Using cached uvloop-0.17.0-cp310-cp310-macosx_10_9_x86_64.whl (1.5 MB)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from anyio<4.0.0,>=3.7.1->fastapi>=0.95.2->chromadb) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from anyio<4.0.0,>=3.7.1->fastapi>=0.95.2->chromadb) (1.1.3)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", + "Installing collected packages: pypika, monotonic, websockets, uvloop, typer, tqdm, pulsar-client, importlib-resources, httptools, h11, chroma-hnswlib, bcrypt, watchfiles, uvicorn, starlette, posthog, fastapi, chromadb\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.64.1\n", + " Uninstalling tqdm-4.64.1:\n", + " Successfully uninstalled tqdm-4.64.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "clarifai 9.8.2 requires tqdm==4.64.1, but you have tqdm 4.66.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed bcrypt-4.0.1 chroma-hnswlib-0.7.3 chromadb-0.4.13 fastapi-0.103.1 h11-0.14.0 httptools-0.6.0 importlib-resources-6.1.0 monotonic-1.6 posthog-3.0.2 pulsar-client-3.3.0 pypika-0.48.9 starlette-0.27.0 tqdm-4.66.1 typer-0.9.0 uvicorn-0.23.2 uvloop-0.17.0 watchfiles-0.20.0 websockets-11.0.3\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (2.1.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pandas) (2023.3.post1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from pandas) (1.26.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting rank_bm25\n", + " Using cached rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)\n", + "Requirement already satisfied: numpy in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from rank_bm25) (1.26.0)\n", + "Installing collected packages: rank_bm25\n", + "Successfully installed rank_bm25-0.2.2\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting weaviate-client\n", + " Using cached weaviate_client-3.24.1-py3-none-any.whl (107 kB)\n", + "Requirement already satisfied: requests<3.0.0,>=2.30.0 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from weaviate-client) (2.31.0)\n", + "Collecting authlib<2.0.0,>=1.2.1\n", + " Using cached Authlib-1.2.1-py2.py3-none-any.whl (215 kB)\n", + "Collecting validators<1.0.0,>=0.21.2\n", + " Using cached validators-0.22.0-py3-none-any.whl (26 kB)\n", + "Requirement already satisfied: cryptography>=3.2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from authlib<2.0.0,>=1.2.1->weaviate-client) (41.0.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (2023.7.22)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (2.0.5)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from cryptography>=3.2->authlib<2.0.0,>=1.2.1->weaviate-client) (1.15.1)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.13/envs/deep-cookie/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=3.2->authlib<2.0.0,>=1.2.1->weaviate-client) (2.21)\n", + "Installing collected packages: validators, authlib, weaviate-client\n", + "Successfully installed authlib-1.2.1 validators-0.22.0 weaviate-client-3.24.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install 'langchain[llms]'\n", + "!pip install Scrapy\n", + "!pip install html2text\n", + "!pip install lxml\n", + "!pip install python-dotenv\n", + "!pip install \"unstructured[all-docs]\"\n", + "!pip install tiktoken\n", + "!pip install faiss-cpu \n", + "!pip install GitPython\n", + "!pip install notebook\n", + "!pip install chromadb\n", + "!pip install pandas\n", + "!pip install rank_bm25\n", + "!pip install weaviate-client" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import logging\n", + "from dotenv import load_dotenv\n", + "from IPython.display import display, Markdown, Latex\n", + "\n", + "logging.getLogger().setLevel(logging.INFO)\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') \n", + "\n", + "WEAVIATE_API_KEY = os.environ.get('WEAVIATE_API_KEY')\n", + "WEAVIATE_CODEARENA_INDEX_NAME = \"CodeArenaDocsV1\"\n", + "WEAVIATE_URL = os.environ.get('WEAVIATE_URL')\n", + "\n", + "assert OPENAI_API_KEY, \"Please set OPENAI_API_KEY in your environment variables\"\n", + "assert WEAVIATE_API_KEY, \"Please set WEAVIATE_API_KEY in your environment variables\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "C4_WEBSITE_STORAGE_DIR = \"../knowledge_base/c4/website\"\n", + "C4_GH_DOCS_STORAGE_DIR = \"../knowledge_base/c4/gh_docs\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.document_loaders import TextLoader\n", + "\n", + "def load_json_files(dir):\n", + " loader = DirectoryLoader(dir, loader_cls=TextLoader)\n", + " documents = loader.load()\n", + " for d in documents:\n", + " page_content_dict = json.loads(d.page_content)\n", + " d.page_content = page_content_dict['md_content']\n", + " d.metadata['url'] = page_content_dict['url']\n", + " return documents\n", + "\n", + "c4_website_data_list = load_json_files(C4_WEBSITE_STORAGE_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.document_loaders import TextLoader\n", + "\n", + "loader = DirectoryLoader(C4_GH_DOCS_STORAGE_DIR, loader_cls=TextLoader)\n", + "c4_gh_docs_data_list = loader.load()\n", + "\n", + "\n", + "for i, d in enumerate(c4_gh_docs_data_list):\n", + " local_path = d.metadata['source']\n", + "\n", + " if \"/README.md\" in local_path:\n", + " # remove README.md from the path\n", + " local_path = local_path.replace(\"/README.md\", \"\")\n", + " \n", + " if \"/SUMMARY.md\" in local_path:\n", + " # remove SUMMARY.md from the path\n", + " local_path = local_path.replace(\"/SUMMARY.md\", \"\")\n", + " \n", + " # remove .md from the path\n", + " local_path = local_path.replace(\".md\", \"\")\n", + "\n", + " d.metadata['url'] = f\"{local_path.replace(C4_GH_DOCS_STORAGE_DIR, 'https://docs.code4rena.com')}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.text_splitter import (\n", + " RecursiveCharacterTextSplitter,\n", + " Language,\n", + ")\n", + "\n", + "md_splitter = RecursiveCharacterTextSplitter.from_language(\n", + " language=Language.MARKDOWN, chunk_size=2000, chunk_overlap=200\n", + ")\n", + "\n", + "\n", + "website_chunks = md_splitter.split_documents(c4_website_data_list)\n", + "gh_docs_chunks = md_splitter.split_documents(c4_gh_docs_data_list)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "89" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "website_chunks_with_source = [d.copy(deep=True) for d in website_chunks]\n", + "\n", + "for i, d in enumerate(website_chunks_with_source):\n", + " d.metadata['source'] = f\"{i}-pl\"\n", + "\n", + "website_chunks_offset = len(website_chunks_with_source)\n", + "website_chunks_offset" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "72" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "gh_docs_chunks_with_source = [d.copy(deep=True) for d in gh_docs_chunks]\n", + "\n", + "for i, d in enumerate(gh_docs_chunks_with_source):\n", + " local_path = d.metadata['source']\n", + " d.metadata['source'] = f\"{i+website_chunks_offset}-pl\"\n", + "\n", + "len(gh_docs_chunks_with_source)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import weaviate\n", + "import os\n", + "from langchain.vectorstores import Weaviate\n", + "\n", + "weaviate_client = weaviate.Client(\n", + " url=WEAVIATE_URL,\n", + " auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),\n", + " additional_headers={\"X-OpenAI-Api-Key\": OPENAI_API_KEY},\n", + ")\n", + "weaviate = Weaviate(weaviate_client, WEAVIATE_CODEARENA_INDEX_NAME, text_key='text')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "schema = {\n", + " \"classes\": [\n", + " {\n", + " \"class\": WEAVIATE_CODEARENA_INDEX_NAME,\n", + " \"description\": \"CodeArena docs index\",\n", + " \"vectorizer\": \"text2vec-openai\",\n", + " \"moduleConfig\": {\n", + " \"text2vec-openai\": {\n", + " \"model\": \"ada\",\n", + " \"modelVersion\": \"002\",\n", + " \"type\": \"text\",\n", + " }\n", + " },\n", + " \"properties\": [\n", + " {\n", + " \"dataType\": [\"text\"],\n", + " \"description\": \"The content of the chunk\",\n", + " \"moduleConfig\": {\n", + " \"text2vec-openai\": {\n", + " \"skip\": False,\n", + " \"vectorizePropertyName\": False,\n", + " }\n", + " },\n", + " \"name\": 'text',\n", + " },\n", + " {\n", + " \"dataType\": [\"text\"],\n", + " \"description\": \"The source id of the chunk\",\n", + " \"moduleConfig\": {\n", + " \"text2vec-openai\": {\n", + " \"skip\": True,\n", + " \"vectorizePropertyName\": False,\n", + " }\n", + " },\n", + " \"name\": 'source',\n", + " },\n", + " {\n", + " \"dataType\": [\"text\"],\n", + " \"description\": \"The reference url of the chunk\",\n", + " \"moduleConfig\": {\n", + " \"text2vec-openai\": {\n", + " \"skip\": True,\n", + " \"vectorizePropertyName\": False,\n", + " }\n", + " },\n", + " \"name\": 'url',\n", + " },\n", + " ],\n", + " },\n", + " ]\n", + "}\n", + "weaviate_client.schema.create(schema)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['a63885a0-cf89-494f-a3e3-039d1c520f74',\n", + " '96a0876d-9b66-44bd-a9ad-8aff129bad0b',\n", + " '1fa4a305-f40e-4c03-bff9-81d694eba9fd',\n", + " 'b3736a0e-3ef2-441c-b1f5-e2a8161101a6',\n", + " '0730bfa7-c4c6-455c-b62e-88c4f3c16ca0',\n", + " 'f4e9c925-6589-4cf0-898d-a852b15f801f',\n", + " '68528680-e144-4412-a353-2d17dad26cd8',\n", + " '1940f313-042d-4bbd-929c-99071086e8c7',\n", + " '9644f0f0-088f-4638-868e-9e6261741b89',\n", + " '2174791b-b7b4-4fa2-a005-dcb81a113ac4',\n", + " '3c234f73-5396-43bd-8037-ac1dc5e19b6b',\n", + " '5f3fb3f7-c8c8-4844-8fa1-78ba10e0f738',\n", + " '10230bb9-df61-4243-a895-602fa8bf7a9a',\n", + " '81c2942c-3aa0-4362-803d-b164958ba319',\n", + " '757d77c8-0908-4fa2-878b-06ed3f069148',\n", + " 'daad18e5-4ae1-429f-9f8d-e6d14cdce071',\n", + " '3fd6fe74-a81f-46f5-a1da-ae3e672d0871',\n", + " '100af503-5a7a-46ee-b8fa-91e417033f71',\n", + " 'a477f817-53c0-467f-86ca-29a26a59b726',\n", + " 'd2d5942b-04ba-4aac-abb5-1576582b5e78',\n", + " 'f9595f9c-6268-4345-a733-385867230b78',\n", + " 'd24b3b0a-31dd-457b-b779-906b98bae359',\n", + " 'a1b6ce72-a349-495c-8b20-d789b40efaee',\n", + " '2dc16dfd-e1cd-40de-af2c-156aafaa0fa9',\n", + " '73c493f0-e53f-4fe9-a155-fe9dde98eec7',\n", + " '5a9f4df9-5cb0-4c0b-a883-374d25db933c',\n", + " '2d1656b9-cfd9-4a32-a136-40823cc40711',\n", + " '87a85b7c-5d6f-41c6-85dd-20b52220fc4e',\n", + " '1459b37b-9b60-45a4-8c1e-13d8ed34d6b5',\n", + " '088e5bb6-ffde-4889-af89-5306191793f1',\n", + " 'f1ec6dbf-9c77-4f0a-9a81-1098a3e9980a',\n", + " '9d7c6bac-5197-42cf-9a9d-a4ee1ca2c176',\n", + " 'd52146ca-1684-4c42-adf6-2e662b133392',\n", + " '0333e01f-197c-4c04-8786-2baed72f5289',\n", + " 'd2163d14-4055-4c26-a4b5-f225fad89c93',\n", + " '97d96a08-80a0-4832-869f-9d07da317509',\n", + " '0dad45ff-4317-4976-addd-4cbdcb9784e7',\n", + " 'd6617d4d-f027-41c2-bba2-1e216cd51f6f',\n", + " '686bde0d-e1fd-4816-9623-1fc62f9376a8',\n", + " '7681d155-68d1-4864-b0f6-33864f2ef074',\n", + " '395ea536-f2ea-4a3e-bfc3-c9addcdab45c',\n", + " 'cbb16a91-5070-4410-bdcc-752ba17af263',\n", + " 'e0939e0d-a60f-4339-9f32-2045b98a7dc2',\n", + " 'e0aa0516-9798-4378-ad8e-7e89f7c3e52e',\n", + " '1ca33e66-5746-4b2a-a837-9e63710154d8',\n", + " 'fd541528-fa8f-4bb9-9cde-2458df2b91da',\n", + " 'a429291c-6113-44b3-a203-eecb7f1f85b2',\n", + " '930af124-b295-4ba6-a341-5363cd5a85bf',\n", + " 'e2024d0c-c8ec-45c9-830f-9cbc905e8972',\n", + " 'a9cbc945-490c-4257-9472-2f572466d0b6',\n", + " 'ba70fdd0-a9ab-4f6a-8a8e-8674b3593a6e',\n", + " '1837ab1e-554b-44df-93eb-50621986da23',\n", + " 'e413df9a-4fdf-4827-8bf6-feb23766c8d4',\n", + " '4c84934b-2c24-424c-b9ea-d3c10bcc4f4b',\n", + " '11fe600d-878b-4273-b955-5a100b1ef6ed',\n", + " 'a934d0bc-0c15-426d-9197-ceb8d1a61226',\n", + " '9843107e-5fc4-46b7-a078-6b8a761e3eb7',\n", + " 'd918ad41-b0fd-4d1a-9e58-311e32913844',\n", + " 'a9d4624f-6f4a-4864-85dd-f89bed48e1d2',\n", + " '5e2e86ea-554b-4af6-ab8d-3f906329870c',\n", + " 'b3a2c8a9-58a9-4af1-a49e-423888e29b6a',\n", + " '84112e47-26d3-43aa-a3ed-2e3099250638',\n", + " 'bc867bf7-cbb9-4f35-8be7-196fe7ec8418',\n", + " 'ba08d2a4-2d4e-4ebc-bb4b-ad1facb7c858',\n", + " '6c40dc6e-0213-4744-84b3-d7f2bedb3b57',\n", + " '955976ea-6e51-41cf-80b3-ea5f4728c342',\n", + " 'eaa1ea31-49b5-4a0b-b0c4-a03b4d716411',\n", + " '8d442b90-ab80-4db3-bcc7-b6e5fb012555',\n", + " 'f4ed46ed-2c3a-496b-8c11-b1c3c76c5c42',\n", + " '7993dc48-764e-4d21-89a3-1ba964882ef8',\n", + " '023b11be-e4d4-461b-b980-2d38bc78c230',\n", + " 'e5e108ae-8130-458e-ae73-759a1bd320ca',\n", + " '80c72ad3-425c-4b49-a0f7-88f104386af1',\n", + " 'b4781a6a-219f-4216-87d7-462d1749a606',\n", + " '95db2fa1-862c-4f31-afc2-6a31a8e24cbf',\n", + " '1759f699-c940-4292-a8ef-88d62a76e2cf',\n", + " '32ac5faa-bf97-4118-98d1-22238af3f748',\n", + " '1a6e136e-2505-4a9b-9e88-540ecda071d2',\n", + " '5331c6e3-5f31-4e32-8317-7e1d7c79518c',\n", + " '270e19f4-77d5-4664-9e76-7414836900bd',\n", + " 'd977763b-76c6-4f6d-99aa-205822f1a7c2',\n", + " 'ed85017b-e410-4b24-8de8-7919a19ca3d9',\n", + " '55d919c6-b60a-4d0e-92c9-0035a241e43c',\n", + " 'c9a77dd5-009c-4512-85e6-bacdac97f624',\n", + " 'c71fc845-fc7a-4386-9af7-de5c7ef9fa68',\n", + " 'c2978346-47d0-4019-8d2b-848e7c94d057',\n", + " '890e63dd-1583-4dc4-9381-b60713672bda',\n", + " '01d5e8db-1db1-4b94-9fc9-22d301e5c27e',\n", + " '7acd189c-398a-464b-943d-3d742f7f0c7d',\n", + " '85fc7bb0-9828-43e3-800f-4eb16db16d86',\n", + " '49856698-f546-445e-abe2-94f492a0e5db',\n", + " 'd5876920-3410-4a0f-a7f1-546588b5c846',\n", + " '34071b42-0f80-468e-9912-4f7efb127324',\n", + " '03898f1a-b0a6-45a9-82e5-de653fbd48ef',\n", + " 'dd11c728-bf66-4a6e-a4a5-0fb1a7172f6d',\n", + " '49bc2aa8-7505-416b-a107-2cfeb0e2475b',\n", + " '83462ba1-261a-4078-aaee-fdd7fa9c0735',\n", + " '95dafc56-48ed-4213-a91d-9b6a2e91cafe',\n", + " '566aa980-ac0d-4b8b-9baa-0249c7b48e0b',\n", + " 'd4e86eda-9988-4357-92c6-0cbadd5285d6',\n", + " '845dd1d1-638b-42d5-8727-37dd95e17d41',\n", + " 'b032e6ac-e159-4357-af81-01097b7122d3',\n", + " 'a74663d0-ab5e-4039-88f5-61e8cbb8d9c1',\n", + " 'e178b656-f492-4c2b-9a71-a165f8894c41',\n", + " '6496ab6f-c469-4771-9d3f-d79a7b2e00bc',\n", + " '322d5a59-3b6b-4f74-9b52-5bf84db81c7f',\n", + " 'be41ba3a-8de9-451e-8394-b93d6a2572b7',\n", + " 'eeed2d9e-da61-4861-aee6-518a3690dae1',\n", + " 'c7ccabba-4660-49e9-856d-7de29ad87160',\n", + " '33c166fa-a06b-4ec6-8930-256ce2e496c5',\n", + " '4f3a1ede-7a7c-40d8-b529-1c406b9189da',\n", + " '8f104fae-2d5b-4c6f-bdd3-7855006ec404',\n", + " '15d13302-253b-4c5d-a853-d6c23aa0e19c',\n", + " '48e6d4bb-990c-4899-b175-ce83d6198a4e',\n", + " '0ad4d813-2325-4f37-b37d-b4aafacd3700',\n", + " 'c75418f9-ec4c-4ea8-81ba-d2b265be735b',\n", + " 'e77714b5-c6fc-41b5-8f31-109843025c1f',\n", + " 'ddc0715d-523f-4a70-8889-0885e8ebbcf5',\n", + " '74f1d6ec-aaab-4260-ad33-638cc8967d1a',\n", + " '3d2a3793-0bf8-485a-a84a-a29ec091f1fc',\n", + " 'a5ae063d-a67f-4193-b0e1-d5ffb087c616',\n", + " '7106d46f-015a-44b6-a0f3-5ecd3b581bb0',\n", + " '7ca23938-8661-4613-940f-234ef91f35c5',\n", + " 'edb872f5-df6b-4970-a0b3-c573941c48e2',\n", + " 'e8e6d68d-5839-485f-bb17-19dca5cc7492',\n", + " 'f065852b-0459-44c6-a1f4-8b16ddbd7b5f',\n", + " '737bd536-88b0-4f3e-a17c-189a1975338c',\n", + " 'a96c26d7-5044-4db5-bb4c-f7acd1c1fac0',\n", + " 'afd978b9-9c23-4120-a786-fa06e4611f73',\n", + " 'c1697fd1-df41-4a12-a266-06c84d46147b',\n", + " '00b491b4-06af-408c-b41d-410148af4bf5',\n", + " '463c4991-2cba-449c-9062-c5c954f7f15c',\n", + " '930c5063-8355-49e4-ba62-1d4b7cd8baf3',\n", + " '9128e4ba-65a6-48d9-bca2-08cadc678817',\n", + " 'fd608c37-2d8a-4764-9730-aa8c0eff398b',\n", + " '49304a63-9dde-4826-a623-df75148ed917',\n", + " '480913d2-6452-4379-9142-9aced64826c0',\n", + " '5a071536-30eb-4db7-8e7c-6376eac7a075',\n", + " 'f4c8f66d-a885-46b3-8416-52b5437d047b',\n", + " 'e8a859be-5577-4b92-9863-8fed92ff6060',\n", + " 'a1997afd-5606-469b-92a0-8131fc42328e',\n", + " '48b6a174-1689-406c-ba95-3c61fad4e7c1',\n", + " 'ed846d69-ae46-4298-b67b-7a021b5f3036',\n", + " 'f6f0fad0-c13b-4bce-970f-df5007220af4',\n", + " '26c9f797-e1d3-4a55-9730-258a1750c23c',\n", + " 'ac5b7bb3-ac20-48d1-87cc-a796572a87fe',\n", + " '2d2f1b5c-4f0b-4cc0-94a0-f1e384dc3777',\n", + " '2e2563c3-14b6-4d75-8df3-46d400d1b26b',\n", + " '47b2a141-ae96-4a0d-a9b0-600f507278f3',\n", + " 'a71a7da4-9074-4ab6-8d79-31b9e20fc578',\n", + " '1b256b2d-2322-4a7e-8463-2a46725ce1fe',\n", + " '4b1b5e87-05f6-4ef4-8bd3-9ce32fb8ad68',\n", + " '2dd27a08-3701-4715-b7f2-904edded27a0',\n", + " 'e2f7e8a1-bdf4-44ae-a377-ee4d78187b6d',\n", + " '1207be77-1875-460d-bc41-fa5c6b41245b',\n", + " '53134e15-f8f0-4a2f-9d11-26562fcb92be',\n", + " '35d8d702-3a80-431e-81e1-f96a3819a6ef',\n", + " 'f5f632b4-1cab-4a51-88a8-6d494a07e3de',\n", + " '3e912ec6-1451-431c-8617-8d13a8d2db3c',\n", + " '6cdd3b47-17a9-4474-8f9d-6b5e6afd60c3',\n", + " '5457093a-d355-4a09-841e-a785ef114917']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weaviate_client.schema.delete_class(WEAVIATE_CODEARENA_INDEX_NAME)\n", + "weaviate.add_documents(website_chunks_with_source + gh_docs_chunks_with_source)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': {'Get': {'CodeArenaDocsV1': [{'source': '157-pl', 'text': \"### How can I become a Judge?\\n\\nComplete [this form](https://code4rena.com/judge-application/) and share: Short bio/intro and summary of relevant experience, links that help demonstrate your expertise, 3 example submissions to Code4rena contests that were judged high severity, description of how each submission demonstrates your depth of knowledge.\\n\\n### How can I become a Scout?\\n\\nYou can’t, just yet! Right now, Scouts are hand-picked by the C4 team as it’s a highly sensitive role. We’re looking at the possibility of opening up this process, but not in the near future.\\n\\n### I want Code4rena to audit my project, where do I start?\\n\\nIt’s really simple! Just visit [this link](https://code4rena.typeform.com/i-want-an-audit) and fill out the form. Our team will be in touch with you shortly after you’ve completed it.\\n\\n### Do you have a blog?\\n\\nWe do indeed, [here](https://medium.com/code-423n4). We post product updates, sponsor interviews and more.\\n\\n### What’s the best way to stay up to date with Code4rena?\\n\\n[Follow us](https://twitter.com/code4rena) on Twitter and turn on notifications in our #announcements channel in [Discord](https://discord.gg/code4rena). \\n\\n### Where can I view the audit results?\\n\\nOnce an audit's results have been finalized, they’ll be shared in our #announcements channel in Discord. The audit's page in the ‘[Audits](https://code4rena.com/contests)’ section on our website will also be updated to show results.\\n\\n### Where can I read past Code4rena audit reports?\\n\\nWe push all public audit reports to the ‘[Reports](https://code4rena.com/reports)’ section on our website. This enables you to read through past findings, evaluate commonalities, and continue your learning journey. Our [GitHub](https://github.com/code-423n4/) repos are also public, for those of you interested in diving deeper.\", 'url': 'https://docs.code4rena.com/structure/frequently-asked-questions'}, {'source': '10-pl', 'text': \"Introducing Code4rena Profiles: a solo auditor's highlight reel.[Learn more\\n->](/how-it-works/wardens)\\n\\nSkip Navigation\\n\\n[](/)\\n\\n[How it works](/how-it-\\nworks)[Leaderboard](/leaderboard)[Audits](/contests)[Reports](/reports)[Docs](https://docs.code4rena.com)[Help](/help)\\n\\nConnect\\n\\nMetaMaskWalletConnectLog inRegister\\n\\nConnect MetaMaskConnect WalletConnectLog in[Register](/register)\\n\\n# How it works\\n\\n## Roles\\n\\n### Warden\\n\\nWardens protect the web3 ecosystem from threats by auditing code.\\n\\n[Learn more about Wardens](/how-it-works/wardens)\\n\\n### Judge\\n\\nJudges decide the severity, validity, and quality of findings and rate the\\nperformance of wardens.\\n\\n### Sponsor\\n\\nSponsors create prize pools to attract wardens to audit their projects.\\n\\n### Scout\\n\\nScouts focus on scoping and pre-audit intel.\\n\\n### Lookout\\n\\nLookouts review and organize submissions to Code4rena's audits.\\n\\n## Audit Types\\n\\n### Open\\n\\nThis is the standard competitive audit format on Code4rena where everyone is\\ninvited to participate and all information is open and public.\\n\\n[Learn more about open competitions](https://medium.com/code-423n4/a-look-at-\\ncode4rena-audits-open-1a8e74e558c8)\\n\\n### Private\\n\\nThis type of audit is restricted to Wardens who have met the conditions of the\\nCode4rena [Certified Contributor](https://docs.code4rena.com/roles/certified-\\ncontributors) program, which includes a Non-Disclosure Agreement. With\\ncustomizations available for your privacy needs, Code4rena can offer you an\\naudit that is as stealthy as you'd like.\\n\\n[Learn more about Private audits](https://medium.com/code4rena/a-look-at-\\ncode4rena-audits-classified-3ee3cbe87617)\", 'url': 'https://code4rena.com/how-it-works'}, {'source': '152-pl', 'text': '### Getting an audit is courageous\\n\\nAn audit is an investment in a better long-term outcome for both the project undergoing the audit as well as its users and the community as a whole. \\n\\nIt requires tremendous courage to ask someone to look closely and find as many places where code could be improved. \\n\\nBecause of this, we treat sponsor projects with respect. We consider every finding discovered in an audit contest as a tool that we can use in the future to help others learn and better understand smart contract vulnerabilities. \\n\\n### Both wardens and sponsors have a voice in the process\\n\\nWe take sponsors’ feelings into consideration, working to help find ways to communicate and disclose the results of audits in a way that works as best as we can to eliminate the “blame and shame” approach and replace it with one that honors sponsors’ contribution toward making their project and the DeFi space more secure for everyone.\\n\\nOur contest process is transparent, with all issues reviewable on GitHub. We put an impartial judge in the position of listening to all players’ positions on a given issue and making a final determination.\\n\\n### We are a community\\n\\nWe trust that a community-driven approach with valuable incentives ensures enough coverage to give sponsors a meaningful and valuable audit without putting the burden on any one person to do a ’good enough job’ catching every bug—a burden that burns out traditional auditors.\\n\\nOur community chose the wolf as our symbol, and it’s fitting. Wolves are highly collaborative creatures who hunt in packs.\\n\\nRather than putting the pressure on individual auditors to catch all the vulnerabilities in a project, we ask competitors to catch the bugs that they can, and trust that the community will show up and contribute.', 'url': 'https://docs.code4rena.com/philosophy/security-is-about-people'}, {'source': '147-pl', 'text': '# **Lookouts**\\n\\nLookouts review and organize submissions to Code4rena\\'s competitions, focusing on a) lightening and clarifying the project team’s workload, and b) preparing the repo for judging.\\n\\nLookouts receive a guaranteed payout from each audit they work on. In the interest of impartiality, they must forgo any awards they would have received for their own submissions as wardens. Therefore, the role is typically assigned prior to the competition\\'s start.\\n\\n# **Becoming a Lookout**\\n\\nTo become a Lookout, you may be nominated by a Judge or Lookout in good standing, or nominate yourself.\\n\\n### **Minimum criteria**\\n\\n- Compete in at least 3 Code4rena audits;\\n- Be a [Certified C4 contributor](/roles/certified-contributors) in good standing;\\n- Find at least 1 high severity finding OR 3 medium severity findings OR score A on at least 3 QA or Gas reports;\\n\\n### **Non-technical criteria**\\n\\n- **Sense of fairness**—i.e. evidence suggests you don\\'t show favoritism, but instead aim for a fair competition where quality is rewarded.\\n- **Clear written communication**—your English does not need to be perfect, but you should be able to engage in technical discussions with judges and sponsors via written English.\\n\\n# How to apply\\n\\nComplete [this application form](https://code4rena.com/lookout-application/) and share:\\n\\n- Short bio/intro and summary of relevant experience\\n- Links that help demonstrate your expertise\\n- 3 example submissions to Code4rena audits that you’re especially proud of\\n- Description of how each submission demonstrates your depth of knowledge\\n\\n**Note:** Lookout applications are reviewed during a one-week period each month. Notices of application and review windows will be posted in the C4 Discord server.\\n\\n**Lookout selection process**\\n\\nBeing a lookout is a critical role and we only have so many spots.\\n\\nLookout applications are reviewed monthly by a group of C4 judges and lookouts. The group will review your application and give you a \"yes\" or \"not yet\".', 'url': 'https://docs.code4rena.com/roles/certified-contributors/lookouts'}]}}}\n" + ] + } + ], + "source": [ + "query_result = weaviate_client.query\\\n", + " .get(WEAVIATE_CODEARENA_INDEX_NAME, [\"text\", \"source\", \"url\"])\\\n", + " .with_hybrid(\n", + " query=\"What is Scout\"\n", + " )\\\n", + " .with_limit(4)\\\n", + " .do()\n", + "\n", + "print(query_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "c4-chatbot", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/qa_bot/notebooks/experiment_c4_qa_bot.ipynb b/qa_bot/notebooks/experiment_c4_qa_bot.ipynb new file mode 100644 index 0000000..7f842f7 --- /dev/null +++ b/qa_bot/notebooks/experiment_c4_qa_bot.ipynb @@ -0,0 +1,1463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CodeArena (C4) Question Answer bot\n", + "\n", + "### Objective\n", + "- This notebook has the PoC work for a Question Answer bot using C4's knowledge bases.\n", + "- The objective of the PoC is to prototype an LLM implementation that can accurately answer questions to their expectation and at the very least perform better than their current bot from [Mava](https://www.mava.app/)\n", + "\n", + "### Observations from the usage of Mava\n", + "- The platform offers Discord support management with ticketing and AI help bot features\n", + "- For the AI help bot, the user is able to specify links to multiple knowledge sources that can be used for answering questions.\n", + "- Based on C4's testing of the Mava bot in the private channel, the following stats were observed:-\n", + " - Total questions asked: 29\n", + " - Total questions mis-answered based on emoji reactions: 13\n", + " - Accuracy - ~55%\n", + "\n", + "### Knowledge Bases\n", + "Based on conversations with their team, the following knowledge bases were identified to be relevant and are the same ones that Mava is using:-\n", + "- [Main Website](https://code4rena.com/)\n", + "- [Docs](https://docs.code4rena.com/) \n", + "\n", + "\n", + "### High-level Approach\n", + "- Crawl and scrape C4’s website and docs using Scrapy lib\n", + "- Convert the html content to markdown format so that the model can better understand the context\n", + "- Use LangChain lib to do the following:-\n", + " - Split the markdown header-separated sections into semantic chunks\n", + " - Embed and store the semantic chunks in an in-memory vector db\n", + " - Use the retrieval augmented functionality to answer the question" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: langchain[llms] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.0.278)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (8.2.3)\n", + "Requirement already satisfied: PyYAML>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.20)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.0.3)\n", + "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.5.14)\n", + "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.31)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (3.8.5)\n", + "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.8.5)\n", + "Requirement already satisfied: numpy<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.25.2)\n", + "Requirement already satisfied: pydantic<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.10.12)\n", + "Requirement already satisfied: requests<3,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.31.0)\n", + "Requirement already satisfied: openlm<0.0.6,>=0.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.5)\n", + "Requirement already satisfied: torch<3,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (2.0.1)\n", + "Requirement already satisfied: openai<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.28.0)\n", + "Requirement already satisfied: huggingface_hub<1,>=0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.16.4)\n", + "Requirement already satisfied: manifest-ml<0.0.2,>=0.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (0.0.1)\n", + "Requirement already satisfied: cohere<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.21)\n", + "Requirement already satisfied: transformers<5,>=4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (4.32.1)\n", + "Requirement already satisfied: clarifai>=9.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (9.7.6)\n", + "Requirement already satisfied: nlpcloud<2,>=1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from langchain[llms]) (1.1.44)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (3.2.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.9.2)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.3.1)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (1.4.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (6.0.4)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain[llms]) (23.1.0)\n", + "Collecting tqdm==4.64.1\n", + " Using cached tqdm-4.64.1-py2.py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: tritonclient==2.34.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (2.34.0)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (23.1)\n", + "Requirement already satisfied: clarifai-grpc>=9.7.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (9.7.6)\n", + "Requirement already satisfied: rich==13.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai>=9.1.0->langchain[llms]) (13.4.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (2.16.1)\n", + "Requirement already satisfied: python-rapidjson>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tritonclient==2.34.0->clarifai>=9.1.0->langchain[llms]) (1.10)\n", + "Requirement already satisfied: fastavro==1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (1.8.2)\n", + "Requirement already satisfied: backoff<3.0,>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.2.1)\n", + "Requirement already satisfied: urllib3<3,>=1.26 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (2.0.4)\n", + "Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cohere<5,>=4->langchain[llms]) (6.8.0)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (0.9.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (3.20.1)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (3.12.3)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface_hub<1,>=0->langchain[llms]) (4.7.1)\n", + "Requirement already satisfied: sqlitedict>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (2.1.0)\n", + "Requirement already satisfied: dill>=0.3.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (0.3.7)\n", + "Requirement already satisfied: redis>=4.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from manifest-ml<0.0.2,>=0.0.1->langchain[llms]) (5.0.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (2023.7.22)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3,>=2->langchain[llms]) (3.4)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain[llms]) (2.0.2)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (3.1.2)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch<3,>=1->langchain[llms]) (1.12)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.3.3)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (0.13.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers<5,>=4->langchain[llms]) (2023.8.8)\n", + "Requirement already satisfied: grpcio>=1.44.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.57.0)\n", + "Requirement already satisfied: googleapis-common-protos>=1.53.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (1.60.0)\n", + "Requirement already satisfied: protobuf>=3.20.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from clarifai-grpc>=9.7.4->clarifai>=9.1.0->langchain[llms]) (4.24.2)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5,>=4->langchain[llms]) (3.16.2)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain[llms]) (1.0.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch<3,>=1->langchain[llms]) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->torch<3,>=1->langchain[llms]) (1.3.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich==13.4.2->clarifai>=9.1.0->langchain[llms]) (0.1.2)\n", + "Installing collected packages: tqdm\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.66.1\n", + " Uninstalling tqdm-4.66.1:\n", + " Successfully uninstalled tqdm-4.66.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "chromadb 0.4.8 requires tqdm>=4.65.0, but you have tqdm 4.64.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed tqdm-4.64.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: Scrapy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.10.1)\n", + "Requirement already satisfied: protego>=0.1.15 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.3.0)\n", + "Requirement already satisfied: parsel>=1.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.8.1)\n", + "Requirement already satisfied: itemloaders>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.1.0)\n", + "Requirement already satisfied: zope.interface>=5.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (6.0)\n", + "Requirement already satisfied: setuptools in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (63.2.0)\n", + "Requirement already satisfied: lxml>=4.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (4.9.3)\n", + "Requirement already satisfied: Twisted<23.8.0,>=18.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (22.10.0)\n", + "Requirement already satisfied: itemadapter>=0.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (0.8.0)\n", + "Requirement already satisfied: PyDispatcher>=2.0.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.0.7)\n", + "Requirement already satisfied: service-identity>=18.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1.0)\n", + "Requirement already satisfied: w3lib>=1.17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (2.1.2)\n", + "Requirement already satisfied: cssselect>=0.9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.2.0)\n", + "Requirement already satisfied: pyOpenSSL>=21.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.2.0)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (41.0.3)\n", + "Requirement already satisfied: queuelib>=1.4.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (1.6.2)\n", + "Requirement already satisfied: tldextract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (3.4.4)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Scrapy) (23.1)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->Scrapy) (1.15.1)\n", + "Requirement already satisfied: jmespath>=0.9.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from itemloaders>=1.0.1->Scrapy) (1.0.1)\n", + "Requirement already satisfied: pyasn1-modules in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.3.0)\n", + "Requirement already satisfied: pyasn1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (0.5.0)\n", + "Requirement already satisfied: attrs>=19.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from service-identity>=18.1.0->Scrapy) (23.1.0)\n", + "Requirement already satisfied: Automat>=0.8.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", + "Requirement already satisfied: hyperlink>=17.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (21.0.0)\n", + "Requirement already satisfied: incremental>=21.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (22.10.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (4.7.1)\n", + "Requirement already satisfied: constantly>=15.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Twisted<23.8.0,>=18.9.0->Scrapy) (15.1.0)\n", + "Requirement already satisfied: requests-file>=1.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (1.5.1)\n", + "Requirement already satisfied: idna in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.4)\n", + "Requirement already satisfied: filelock>=3.0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (3.12.3)\n", + "Requirement already satisfied: requests>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tldextract->Scrapy) (2.31.0)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from Automat>=0.8.0->Twisted<23.8.0,>=18.9.0->Scrapy) (1.16.0)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->Scrapy) (2.21)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2023.7.22)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (2.0.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.1.0->tldextract->Scrapy) (3.2.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: html2text in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2020.1.16)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (4.9.3)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: python-dotenv in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.0.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: unstructured[all-docs] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.10.11)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.12.2)\n", + "Requirement already satisfied: nltk in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.8.1)\n", + "Requirement already satisfied: tabulate in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: dataclasses-json in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.14)\n", + "Requirement already satisfied: chardet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (5.2.0)\n", + "Requirement already satisfied: filetype in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: emoji in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.8.0)\n", + "Requirement already satisfied: requests in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.31.0)\n", + "Requirement already satisfied: python-magic in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.4.27)\n", + "Requirement already satisfied: lxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (4.9.3)\n", + "Requirement already satisfied: msg-parser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.2.0)\n", + "Requirement already satisfied: markdown in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.4.4)\n", + "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.1.0)\n", + "Requirement already satisfied: python-pptx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.6.22)\n", + "Requirement already satisfied: openpyxl in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: python-docx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.8.11)\n", + "Requirement already satisfied: pdf2image in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.16.3)\n", + "Requirement already satisfied: xlrd in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: unstructured-inference in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.5.19)\n", + "Requirement already satisfied: ebooklib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (0.18)\n", + "Requirement already satisfied: pypandoc in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (1.11)\n", + "Requirement already satisfied: Pillow<10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (9.5.0)\n", + "Requirement already satisfied: pdfminer.six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured[all-docs]) (20221105)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->unstructured[all-docs]) (2.4.1)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (0.9.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from dataclasses-json->unstructured[all-docs]) (3.20.1)\n", + "Requirement already satisfied: six in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ebooklib->unstructured[all-docs]) (1.16.0)\n", + "Requirement already satisfied: olefile>=0.46 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from msg-parser->unstructured[all-docs]) (0.46)\n", + "Requirement already satisfied: regex>=2021.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (2023.8.8)\n", + "Requirement already satisfied: tqdm in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (4.64.1)\n", + "Requirement already satisfied: click in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (8.1.7)\n", + "Requirement already satisfied: joblib in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nltk->unstructured[all-docs]) (1.3.2)\n", + "Requirement already satisfied: et-xmlfile in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from openpyxl->unstructured[all-docs]) (1.1.0)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2023.3)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (1.25.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas->unstructured[all-docs]) (2.8.2)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (41.0.3)\n", + "Requirement already satisfied: charset-normalizer>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfminer.six->unstructured[all-docs]) (3.2.0)\n", + "Requirement already satisfied: XlsxWriter>=0.5.7 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-pptx->unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2023.7.22)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests->unstructured[all-docs]) (2.0.4)\n", + "Requirement already satisfied: huggingface-hub in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.16.4)\n", + "Requirement already satisfied: transformers>=4.25.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.32.1)\n", + "Requirement already satisfied: opencv-python!=4.7.0.68 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (4.8.0.76)\n", + "Requirement already satisfied: python-multipart in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.0.6)\n", + "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (0.3.4)\n", + "Requirement already satisfied: onnxruntime in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from unstructured-inference->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (1.15.1)\n", + "Requirement already satisfied: packaging>=17.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->unstructured[all-docs]) (23.1)\n", + "Requirement already satisfied: filelock in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (3.12.3)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (6.0.1)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.13.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from transformers>=4.25.1->unstructured-inference->unstructured[all-docs]) (0.3.3)\n", + "Requirement already satisfied: fsspec in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from huggingface-hub->unstructured-inference->unstructured[all-docs]) (4.7.1)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json->unstructured[all-docs]) (1.0.0)\n", + "Requirement already satisfied: iopath in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.1.10)\n", + "Requirement already satisfied: scipy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.11.2)\n", + "Requirement already satisfied: pdfplumber in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.10.2)\n", + "Requirement already satisfied: torch in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.1)\n", + "Requirement already satisfied: torchvision in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.15.2)\n", + "Requirement already satisfied: effdet in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.4.1)\n", + "Requirement already satisfied: pytesseract in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.3.10)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (1.12)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (15.0.1)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (23.5.26)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime->unstructured-inference->unstructured[all-docs]) (4.24.2)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six->unstructured[all-docs]) (2.21)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime->unstructured-inference->unstructured[all-docs]) (10.0)\n", + "Requirement already satisfied: omegaconf>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.3.0)\n", + "Requirement already satisfied: pycocotools>=2.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.0.7)\n", + "Requirement already satisfied: timm>=0.9.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.9.6)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1.2)\n", + "Requirement already satisfied: networkx in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.1)\n", + "Requirement already satisfied: portalocker in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.7.0)\n", + "Requirement already satisfied: pypdfium2>=4.18.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.19.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime->unstructured-inference->unstructured[all-docs]) (1.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.9.3)\n", + "Requirement already satisfied: matplotlib>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.7.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (2.1.3)\n", + "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (3.0.9)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.1.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (4.42.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (0.11.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference->unstructured[all-docs]) (1.4.5)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: tiktoken in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.0)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2023.8.8)\n", + "Requirement already satisfied: requests>=2.26.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from tiktoken) (2.31.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: faiss-cpu in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (1.7.4)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: GitPython in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (3.1.33)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from GitPython) (4.0.10)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from gitdb<5,>=4.0.1->GitPython) (5.0.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: notebook in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (7.0.3)\n", + "Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.24.0)\n", + "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (0.2.3)\n", + "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (2.7.3)\n", + "Requirement already satisfied: jupyterlab<5,>=4.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (4.0.5)\n", + "Requirement already satisfied: tornado>=6.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from notebook) (6.3.3)\n", + "Requirement already satisfied: jupyter-client>=7.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (8.3.1)\n", + "Requirement already satisfied: anyio>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (4.0.0)\n", + "Requirement already satisfied: prometheus-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", + "Requirement already satisfied: traitlets>=5.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.0)\n", + "Requirement already satisfied: argon2-cffi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1.0)\n", + "Requirement already satisfied: nbconvert>=6.4.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.8.0)\n", + "Requirement already satisfied: nbformat>=5.3.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.9.2)\n", + "Requirement already satisfied: terminado>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.17.1)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (23.1)\n", + "Requirement already satisfied: jinja2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (3.1.2)\n", + "Requirement already satisfied: pyzmq>=24 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (25.1.1)\n", + "Requirement already satisfied: jupyter-server-terminals in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.4.4)\n", + "Requirement already satisfied: websocket-client in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.6.2)\n", + "Requirement already satisfied: jupyter-events>=0.6.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (5.3.1)\n", + "Requirement already satisfied: overrides in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (7.4.0)\n", + "Requirement already satisfied: send2trash>=1.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook) (1.8.2)\n", + "Requirement already satisfied: tomli in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.1)\n", + "Requirement already satisfied: async-lru>=1.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.0.4)\n", + "Requirement already satisfied: ipykernel in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (6.25.1)\n", + "Requirement already satisfied: jupyter-lsp>=2.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab<5,>=4.0.2->notebook) (2.2.0)\n", + "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.31.0)\n", + "Requirement already satisfied: babel>=2.10 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (2.12.1)\n", + "Requirement already satisfied: jsonschema>=4.17.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (4.19.0)\n", + "Requirement already satisfied: json5>=0.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook) (0.9.14)\n", + "Requirement already satisfied: idna>=2.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (3.4)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook) (1.1.3)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab<5,>=4.0.2->notebook) (4.7.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook) (2.1.3)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.10.2)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.1)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (23.1.0)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (0.30.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-client>=7.4.4->jupyter-server<3,>=2.4.0->notebook) (2.8.2)\n", + "Requirement already satisfied: platformdirs>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->jupyter-server<3,>=2.4.0->notebook) (3.10.0)\n", + "Requirement already satisfied: pyyaml>=5.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (6.0.1)\n", + "Requirement already satisfied: rfc3986-validator>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.1)\n", + "Requirement already satisfied: python-json-logger>=2.0.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (2.0.7)\n", + "Requirement already satisfied: rfc3339-validator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook) (0.1.4)\n", + "Requirement already satisfied: bleach!=5.0.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (6.0.0)\n", + "Requirement already satisfied: defusedxml in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.7.1)\n", + "Requirement already satisfied: nbclient>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.8.0)\n", + "Requirement already satisfied: pygments>=2.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.16.1)\n", + "Requirement already satisfied: tinycss2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.2.1)\n", + "Requirement already satisfied: beautifulsoup4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (4.12.2)\n", + "Requirement already satisfied: mistune<4,>=2.0.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (3.0.1)\n", + "Requirement already satisfied: jupyterlab-pygments in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.2.2)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.5.0)\n", + "Requirement already satisfied: fastjsonschema in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook) (2.18.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->jupyterlab-server<3,>=2.22.1->notebook) (3.2.0)\n", + "Requirement already satisfied: ptyprocess in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from terminado>=0.8.3->jupyter-server<3,>=2.4.0->notebook) (0.7.0)\n", + "Requirement already satisfied: argon2-cffi-bindings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (21.2.0)\n", + "Requirement already satisfied: debugpy>=1.6.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.6.7.post1)\n", + "Requirement already satisfied: matplotlib-inline>=0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.6)\n", + "Requirement already satisfied: psutil in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.9.5)\n", + "Requirement already satisfied: ipython>=7.23.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (8.14.0)\n", + "Requirement already satisfied: appnope in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.3)\n", + "Requirement already satisfied: nest-asyncio in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.5.7)\n", + "Requirement already satisfied: comm>=0.1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.1.4)\n", + "Requirement already satisfied: six>=1.9.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (1.16.0)\n", + "Requirement already satisfied: webencodings in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (0.5.1)\n", + "Requirement already satisfied: decorator in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (5.1.1)\n", + "Requirement already satisfied: stack-data in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.6.2)\n", + "Requirement already satisfied: pickleshare in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.7.5)\n", + "Requirement already satisfied: pexpect>4.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (4.8.0)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (3.0.39)\n", + "Requirement already satisfied: backcall in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.0)\n", + "Requirement already satisfied: jedi>=0.16 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.19.0)\n", + "Requirement already satisfied: jsonpointer>1.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (2.4)\n", + "Requirement already satisfied: isoduration in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (20.11.0)\n", + "Requirement already satisfied: uri-template in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.3.0)\n", + "Requirement already satisfied: webcolors>=1.11 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.13)\n", + "Requirement already satisfied: fqdn in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.5.1)\n", + "Requirement already satisfied: cffi>=1.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (1.15.1)\n", + "Requirement already satisfied: soupsieve>1.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook) (2.4.1)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook) (2.21)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.8.3)\n", + "Requirement already satisfied: wcwidth in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.6)\n", + "Requirement already satisfied: arrow>=0.15.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from isoduration->jsonschema>=4.17.3->jupyterlab-server<3,>=2.22.1->notebook) (1.2.3)\n", + "Requirement already satisfied: executing>=1.2.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (1.2.0)\n", + "Requirement already satisfied: pure-eval in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (0.2.2)\n", + "Requirement already satisfied: asttokens>=2.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyterlab<5,>=4.0.2->notebook) (2.2.1)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: chromadb in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.4.8)\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.15.1)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.13.3)\n", + "Requirement already satisfied: fastapi<0.100.0,>=0.95.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.99.1)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.7.1)\n", + "Requirement already satisfied: posthog>=2.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.0.2)\n", + "Requirement already satisfied: pypika>=0.48.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.48.9)\n", + "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.7.2)\n", + "Requirement already satisfied: requests>=2.28 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", + "Requirement already satisfied: importlib-resources in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (6.0.1)\n", + "Requirement already satisfied: numpy>=1.21.6 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.25.2)\n", + "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (4.0.1)\n", + "Requirement already satisfied: overrides>=7.3.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (7.4.0)\n", + "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (3.3.0)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (0.23.2)\n", + "Collecting tqdm>=4.65.0\n", + " Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: pydantic<2.0,>=1.9 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from chromadb) (1.10.12)\n", + "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from fastapi<0.100.0,>=0.95.2->chromadb) (0.27.0)\n", + "Requirement already satisfied: packaging in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", + "Requirement already satisfied: coloredlogs in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n", + "Requirement already satisfied: protobuf in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.2)\n", + "Requirement already satisfied: flatbuffers in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n", + "Requirement already satisfied: sympy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n", + "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: backoff>=1.10.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", + "Requirement already satisfied: python-dateutil>2.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n", + "Requirement already satisfied: monotonic>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n", + "Requirement already satisfied: certifi in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n", + "Requirement already satisfied: click>=7.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.7)\n", + "Requirement already satisfied: h11>=0.8 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0.1)\n", + "Requirement already satisfied: watchfiles>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n", + "Requirement already satisfied: httptools>=0.5.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\n", + "Requirement already satisfied: python-dotenv>=0.13 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", + "Requirement already satisfied: websockets>=10.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.3)\n", + "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (4.0.0)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.3)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n", + "Installing collected packages: tqdm\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.64.1\n", + " Uninstalling tqdm-4.64.1:\n", + " Successfully uninstalled tqdm-4.64.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "clarifai 9.7.6 requires tqdm==4.64.1, but you have tqdm 4.66.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed tqdm-4.66.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: pandas in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (2.1.0)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (1.25.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from pandas) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: rank_bm25 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (0.2.2)\n", + "Requirement already satisfied: numpy in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from rank_bm25) (1.25.2)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Collecting weaviate-client\n", + " Downloading weaviate_client-3.24.1-py3-none-any.whl (107 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.0/108.0 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting validators<1.0.0,>=0.21.2\n", + " Using cached validators-0.22.0-py3-none-any.whl (26 kB)\n", + "Collecting authlib<2.0.0,>=1.2.1\n", + " Downloading Authlib-1.2.1-py2.py3-none-any.whl (215 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m215.3/215.3 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: requests<3.0.0,>=2.30.0 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from weaviate-client) (2.31.0)\n", + "Requirement already satisfied: cryptography>=3.2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from authlib<2.0.0,>=1.2.1->weaviate-client) (41.0.3)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (2023.7.22)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client) (3.4)\n", + "Requirement already satisfied: cffi>=1.12 in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cryptography>=3.2->authlib<2.0.0,>=1.2.1->weaviate-client) (1.15.1)\n", + "Requirement already satisfied: pycparser in /Users/sagarshah/.pyenv/versions/3.10.7/envs/c4-chatbot/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=3.2->authlib<2.0.0,>=1.2.1->weaviate-client) (2.21)\n", + "Installing collected packages: validators, authlib, weaviate-client\n", + "Successfully installed authlib-1.2.1 validators-0.22.0 weaviate-client-3.24.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "# Install all the third-party packages\n", + "\n", + "!pip install 'langchain[llms]'\n", + "!pip install Scrapy\n", + "!pip install html2text\n", + "!pip install lxml\n", + "!pip install python-dotenv\n", + "!pip install \"unstructured[all-docs]\"\n", + "!pip install tiktoken\n", + "!pip install faiss-cpu \n", + "!pip install GitPython\n", + "!pip install notebook\n", + "!pip install chromadb\n", + "!pip install pandas\n", + "!pip install rank_bm25\n", + "!pip install weaviate-client\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# General setup - you can specify OPENAI_API_KEY in .env file\n", + "\n", + "import logging\n", + "from dotenv import load_dotenv\n", + "from IPython.display import display, Markdown, Latex\n", + "\n", + "logging.getLogger().setLevel(logging.INFO)\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') or getpass.getpass('Enter your OpenAI API key: ')\n", + "\n", + "assert OPENAI_API_KEY, \"Please set OPENAI_API_KEY in your environment variables\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Paths to the data\n", + "\n", + "C4_WEBSITE_STORAGE_DIR = \"../knowledge_base/c4/website\"\n", + "C4_DOCS_STORAGE_DIR = \"../knowledge_base/c4/docs\"\n", + "C4_GH_DOCS_STORAGE_DIR = \"../knowledge_base/c4/gh_docs\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Crawling and Scraping using Scrapy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import scrapy\n", + "import html2text\n", + "import lxml.html\n", + "import json\n", + "from urllib.parse import urlparse\n", + "\n", + "class GenericSpider(scrapy.Spider):\n", + " name = 'generic'\n", + "\n", + " def __init__(self, domain='', storage_dir='.', *args, **kwargs):\n", + " super(GenericSpider, self).__init__(*args, **kwargs)\n", + " self.allowed_domains = [domain]\n", + " self.start_urls = [f'http://{domain}/']\n", + " self.storage_dir = storage_dir\n", + " \n", + " def parse(self, response):\n", + " # Remove unwanted elements using lxml\n", + " tree = lxml.html.fromstring(response.text)\n", + " \n", + " # Remove non-text related tags\n", + " for unwanted in tree.xpath('//script|//img|//video|//audio|//iframe|//object|//embed|//canvas|//svg|//link|//source|//track|//map|//area'):\n", + " unwanted.drop_tree()\n", + "\n", + " cleaned_html = lxml.html.tostring(tree).decode('utf-8')\n", + "\n", + " # Convert HTML to Markdown\n", + " converter = html2text.HTML2Text()\n", + " markdown_text = converter.handle(cleaned_html)\n", + "\n", + " # Save to a markdown file in the specified directory\n", + " if not os.path.exists(self.storage_dir):\n", + " os.makedirs(self.storage_dir)\n", + "\n", + " url = response.url\n", + " page_name = response.url.split(\"/\")[-1] if response.url.split(\"/\")[-1] else \"index\"\n", + "\n", + " filename = os.path.join(self.storage_dir, f'{page_name}.json')\n", + "\n", + " with open(filename, 'w') as f:\n", + " # Store the URL and markdown text in JSON format\n", + " json.dump({'url': url, 'md_content': markdown_text}, f)\n", + "\n", + " # Recursively follow relative links to other pages on the same domain\n", + " for href in response.css('a::attr(href)').getall():\n", + " url = response.urljoin(href)\n", + " if urlparse(url).netloc in self.allowed_domains:\n", + " yield scrapy.Request(url, self.parse)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NOTE: Data has already been scraped and saved locally as JSON files in the 'knowledge_base/c4' directory. To re-run the scraping, uncomment the code in the cell below.\n", + "\n", + "On re-running the crawler, if you get 'ReactorNotRestartable' error, the notebook kernel would need to be restarted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from scrapy.crawler import CrawlerRunner\n", + "# from scrapy.utils.project import get_project_settings\n", + "# from twisted.internet import reactor\n", + "\n", + "# settings = get_project_settings()\n", + "\n", + "# runner = CrawlerRunner(settings)\n", + "# runner.crawl(GenericSpider, domain=\"code4rena.com\", storage_dir=C4_WEBSITE_STORAGE_DIR)\n", + "# runner.crawl(GenericSpider, domain=\"docs.code4rena.com\", storage_dir=C4_DOCS_STORAGE_DIR)\n", + "# d = runner.join()\n", + "# d.addBoth(lambda _: reactor.stop())\n", + "# reactor.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Get docs from Github Repo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from git import Repo\n", + "\n", + "# repo = Repo.clone_from(\n", + "# \"https://github.com/code-423n4/docs\", to_path=C4_GH_DOCS_STORAGE_DIR\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieval Augmented Generation using LangChain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Load locally saved scraped data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.document_loaders import TextLoader\n", + "\n", + "def load_json_files(dir):\n", + " loader = DirectoryLoader(dir, loader_cls=TextLoader)\n", + " documents = loader.load()\n", + " for d in documents:\n", + " page_content_dict = json.loads(d.page_content)\n", + " d.page_content = page_content_dict['md_content']\n", + " d.metadata['url'] = page_content_dict['url']\n", + " return documents\n", + "\n", + "c4_website_data_list = load_json_files(C4_WEBSITE_STORAGE_DIR)\n", + "c4_docs_data_list = load_json_files(C4_DOCS_STORAGE_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.document_loaders import TextLoader\n", + "\n", + "loader = DirectoryLoader(C4_GH_DOCS_STORAGE_DIR, loader_cls=TextLoader)\n", + "c4_gh_docs_data_list = loader.load()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Split the markdown content into semantic chunks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.text_splitter import (\n", + " RecursiveCharacterTextSplitter,\n", + " Language,\n", + ")\n", + "\n", + "md_splitter = RecursiveCharacterTextSplitter.from_language(\n", + " language=Language.MARKDOWN, chunk_size=2000, chunk_overlap=200\n", + ")\n", + "\n", + "\n", + "website_chunks = md_splitter.split_documents(c4_website_data_list)\n", + "docs_chunks = md_splitter.split_documents(c4_docs_data_list)\n", + "gh_docs_chunks = md_splitter.split_documents(c4_gh_docs_data_list)\n", + "\n", + "print(len(website_chunks))\n", + "print(len(docs_chunks))\n", + "print(len(gh_docs_chunks))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Embed the semantic chunks and store in an in-memory vector db" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.vectorstores import FAISS\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "\n", + "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "Chroma(\"vectorstore\").delete_collection()\n", + "vectorstore = Chroma(\"vectorstore\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "\n", + "vectorstore.add_documents(website_chunks)\n", + "#vectorstore.add_documents(docs_chunks)\n", + "vectorstore.add_documents(gh_docs_chunks)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Retrieval Augmented Generation\n", + "Workflow \n", + "1. Use faster LLM (GPT-3.5) to generate 3 rephrased variants of the original user question to improve question quality which in-turn should improve retrieval\n", + "2. Use the rephrased question to generate the final answer using RAG" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Generate rephrased questions\n", + "Use faster LLM (GPT-3.5) to generate 3 rephrased variants of the original user question to improve question quality which in-turn should improve retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts import PromptTemplate\n", + "\n", + "prompt_template = \"\"\"You are a teacher who is helping a student ask the right questions about a service so that they can look in the most relevant places to find the answer. \n", + "# INSTRUCTIONS\n", + "- You are given student's question below\n", + "- Using the original question, generate 3 alternative questions that are rephrased to be not vague or ambiguous so as to clearly convey the same meaning and context as the original question\n", + "- Return the final result as a JSON object containing a list of rephrased questions as \"new_questions\" field\n", + "\n", + "# QUESTION\n", + "{question}\n", + "\n", + "# RESULT\n", + "\"\"\"\n", + "\n", + "\n", + "def generate_rephrased_questions(question):\n", + " chat = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + " llm_chain = LLMChain(llm=chat, prompt=PromptTemplate.from_template(prompt_template))\n", + "\n", + " result = llm_chain(inputs={\"question\": question}, return_only_outputs=True)\n", + " result_dict = json.loads(result['text'])\n", + " new_questions = result_dict['new_questions']\n", + " return new_questions\n", + "\n", + "generate_rephrased_questions(\"What are scout awards?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Generate final answer using RAG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def display_result(question, result):\n", + " display(Markdown(f\"### Question\"))\n", + " display(Markdown(\"ORIGINAL: \" + question))\n", + " display(Markdown(\"REPHRASED: \" + f\"{result['rephrased_question'] if result['rephrased_question'] else 'None'}\"))\n", + "\n", + " display(Markdown(f\"### Answer\"))\n", + " display(Markdown(result[\"result\"]))\n", + "\n", + " display(Markdown(f\"### Sources\"))\n", + " sources = [r.metadata['url'] if 'url' in r.metadata else r.metadata['source'] for r in result[\"source_documents\"] ]\n", + " print(\", \".join(sources))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), return_source_documents=True)\n", + "\n", + "\n", + "def call_llm(question, use_rephrased_questions=True):\n", + " if not use_rephrased_questions:\n", + " result = qa({\"query\": question})\n", + " result['rephrased_question'] = None\n", + " return result\n", + "\n", + "\n", + " # Get rephrased questions\n", + " rephrased_questions = generate_rephrased_questions(question)\n", + "\n", + " # Attempt each question until a valid result is found\n", + " for q in rephrased_questions:\n", + " result = qa({\"query\": q})\n", + " answer = result['result']\n", + " result['rephrased_question'] = None\n", + " \n", + " # If the model is unable to find an answer, it returns 'sorry' in the response, we try again with a different question\n", + " if 'sorry' in answer.lower():\n", + " continue\n", + " else:\n", + " result['rephrased_question'] = q\n", + " break\n", + "\n", + " return result\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### AutoEvaluator\n", + "Using LangChain's [AutoEvaluator technique](https://autoevaluator.langchain.com/) to evaluate the bot's performance on the dataset of C4 questions correctly answered by Mava as per team feedback\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import yaml\n", + "\n", + "# load yaml file\n", + "with open('knowledge_base/c4/c4_mava_correct_ans_set.yaml') as file:\n", + " # The FullLoader parameter handles the conversion from YAML\n", + " # scalar values to Python the dictionary format\n", + " yaml_data = yaml.load(file, Loader=yaml.FullLoader)\n", + "\n", + "mava_questions = [d['question'] for d in yaml_data]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "\n", + "template = \"\"\" \n", + " You are a grader trying to determine if a set of retrieved documents will help a student answer a question. \\n\n", + "\n", + " Here is the question: \\n\n", + " {query}\n", + "\n", + " Here are the documents retrieved to answer question: \\n\n", + " {result}\n", + " \n", + " Here is the correct answer to the question: \\n \n", + " {answer}\n", + " \n", + " Criteria: \n", + " relevance: Do all of the documents contain information that will help the student arrive that the correct answer to the question?\"\n", + "\n", + " Your response should be as follows:\n", + "\n", + " GRADE: (Correct or Incorrect, depending if all of the documents retrieved meet the criterion)\n", + " (line break)\n", + " JUSTIFICATION: (Write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Use three sentences maximum. Keep the answer as concise as possible.)\n", + " \"\"\"\n", + "\n", + "GRADE_DOCS_PROMPT = PromptTemplate(input_variables=['result', 'answer', 'query'], template=template)\n", + "\n", + "template = \"\"\"You are a teacher grading a quiz. \n", + "You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either Correct or Incorrect.\n", + "\n", + "Example Format:\n", + "QUESTION: question here\n", + "STUDENT ANSWER: student's answer here\n", + "TRUE ANSWER: true answer here\n", + "GRADE: Correct or Incorrect here\n", + "\n", + "Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. If the student answers that there is no specific information provided in the context, then the answer is Incorrect. Begin! \n", + "\n", + "QUESTION: {query}\n", + "STUDENT ANSWER: {result}\n", + "TRUE ANSWER: {answer}\n", + "GRADE:\n", + "\n", + "Your response should be as follows:\n", + "\n", + "GRADE: (Correct or Incorrect)\n", + "(line break)\n", + "JUSTIFICATION: (Without mentioning the student/teacher framing of this prompt, explain why the STUDENT ANSWER is Correct or Incorrect. Use one or two sentences maximum. Keep the answer as concise as possible.)\n", + "\"\"\"\n", + "\n", + "GRADE_ANSWER_PROMPT = PromptTemplate(input_variables=[\"query\", \"result\", \"answer\"], template=template)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.evaluation.qa import QAEvalChain\n", + "\n", + "def grade_model_answer(predicted_dataset, predictions):\n", + "\n", + " # Create an evaluation chain\n", + " eval_chain = QAEvalChain.from_llm(\n", + " llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0),\n", + " prompt=GRADE_ANSWER_PROMPT\n", + " )\n", + "\n", + " # Evaluate the predictions and ground truth using the evaluation chain\n", + " graded_outputs = eval_chain.evaluate(\n", + " predicted_dataset,\n", + " predictions,\n", + " question_key=\"question\",\n", + " prediction_key=\"result\"\n", + " )\n", + "\n", + " return graded_outputs\n", + "\n", + "\n", + "def grade_model_retrieval(gt_dataset, predictions):\n", + " # Create an evaluation chain\n", + " eval_chain = QAEvalChain.from_llm(\n", + " llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0),\n", + " prompt=GRADE_DOCS_PROMPT\n", + " )\n", + "\n", + " # Evaluate the predictions and ground truth using the evaluation chain\n", + " graded_outputs = eval_chain.evaluate(\n", + " gt_dataset,\n", + " predictions,\n", + " question_key=\"question\",\n", + " prediction_key=\"result\"\n", + " )\n", + " return graded_outputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bot_answers = []\n", + "source_docs = []\n", + "for d in yaml_data:\n", + " result = call_llm(d['question'])\n", + " bot_answers.append(result['result'])\n", + " source_docs.append(result['source_documents'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "predictions = [{'result': a} for a in bot_answers]\n", + "\n", + "answer_grades = grade_model_answer(yaml_data, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "retrieved_docs = []\n", + "for i, d in enumerate(yaml_data):\n", + " retrieved_doc_text = \"\"\n", + " for j, doc in enumerate(source_docs[i]):\n", + " retrieved_doc_text += \"Doc %s: \" % str(j + 1) + doc.page_content + \" \"\n", + " retrieved = {\"question\": d[\"question\"], \"answer\": d[\"answer\"], \"result\": retrieved_doc_text}\n", + " retrieved_docs.append(retrieved)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "retrieval_grades = grade_model_retrieval(yaml_data, retrieved_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.DataFrame({\n", + " \"question\": [d['question'] for d in yaml_data],\n", + " \"Mava correct answer (True value)\": [d['answer'] for d in yaml_data],\n", + " \"Bot answers\": [p['result'] for p in predictions],\n", + " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades],\n", + " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades]\n", + "})\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### HyDE technique\n", + "This technique can help improve information retrieval\n", + "\n", + "https://python.langchain.com/docs/use_cases/question_answering/how_to/hyde" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "collection_name = \"vectorstore_hyde\"\n", + "Chroma(collection_name).delete_collection()\n", + "\n", + "vectorstore_hyde = Chroma(collection_name, embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "vectorstore_hyde.add_documents(website_chunks)\n", + "vectorstore_hyde.add_documents(gh_docs_chunks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.vectorstores.base import VectorStoreRetriever\n", + "from langchain.callbacks.manager import (\n", + " AsyncCallbackManagerForRetrieverRun,\n", + " CallbackManagerForRetrieverRun,\n", + ")\n", + "from langchain.docstore.document import Document\n", + "from typing import List\n", + "\n", + "class HydeRetriever(VectorStoreRetriever):\n", + "\n", + " def _get_relevant_documents(\n", + " self, query: str, *, run_manager: CallbackManagerForRetrieverRun\n", + " ) -> List[Document]:\n", + " llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + " web_search_template = \"\"\"Please write a passage to answer the question \n", + " Question: {QUESTION}\n", + " Passage:\"\"\"\n", + "\n", + " web_search = PromptTemplate(template=web_search_template, input_variables=[\"QUESTION\"])\n", + "\n", + " llm_chain = LLMChain(llm=llm, prompt=web_search)\n", + "\n", + " result = llm_chain(inputs={\"QUESTION\": query}, return_only_outputs=True)\n", + " hyquery = result['text']\n", + "\n", + " return super()._get_relevant_documents(hyquery, run_manager=run_manager)\n", + "\n", + "\n", + "hyde_retriever = HydeRetriever(vectorstore=vectorstore_hyde)\n", + "\n", + "hyde_retriever.get_relevant_documents(\"How can I access findings.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=hyde_retriever, return_source_documents=True)\n", + "\n", + "\n", + "def call_hyde_llm(question):\n", + " result = qa({\"query\": question})\n", + " result['rephrased_question'] = None\n", + " return result\n", + "\n", + "def ask_hyde(question):\n", + " result = call_hyde_llm(question)\n", + " display_result(question, result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Vector Store with Sources" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "\n", + "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "collection_name = \"vectorstore_with_sources\"\n", + "Chroma(collection_name).delete_collection()\n", + "vectorstore_with_sources = Chroma(collection_name, embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", + "\n", + "for i, d in enumerate(website_chunks):\n", + " dd = d.copy(deep=True)\n", + " dd.metadata['source'] = f\"w{i}-pl\"\n", + " vectorstore_with_sources.add_documents([dd])\n", + "\n", + "for i, d in enumerate(gh_docs_chunks):\n", + " dd = d.copy(deep=True)\n", + " local_path = dd.metadata['source']\n", + " dd.metadata['source'] = f\"g{i}-pl\"\n", + " dd.metadata['url'] = f\"{local_path.replace(C4_GH_DOCS_STORAGE_DIR, 'https://github.com/code-423n4/docs/blob/main/')}\"\n", + " vectorstore_with_sources.add_documents([dd])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### MultiQuery approach" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.retrievers.multi_query import MultiQueryRetriever\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "multiquery_retriever = MultiQueryRetriever.from_llm(\n", + " retriever=vectorstore_with_sources.as_retriever(), llm=llm\n", + ")\n", + "import logging\n", + "\n", + "logging.basicConfig()\n", + "logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.ERROR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n", + "\n", + "lowercased_website_chunks = []\n", + "for d in website_chunks:\n", + " dd = d.copy()\n", + " dd.page_content = d.page_content.lower()\n", + " lowercased_website_chunks.append(dd)\n", + "\n", + "\n", + "lowercased_gh_docs_chunks = []\n", + "for d in gh_docs_chunks:\n", + " dd = d.copy()\n", + " dd.page_content = d.page_content.lower()\n", + " lowercased_gh_docs_chunks.append(dd)\n", + "\n", + "bm25_retriever = BM25Retriever.from_documents(lowercased_website_chunks + lowercased_gh_docs_chunks)\n", + "bm25_retriever.k = 2\n", + "\n", + "ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, multiquery_retriever], weights=[0.5, 0.5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Final Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQAWithSourcesChain\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "\n", + "model = ChatOpenAI(model_name=\"gpt-4\", temperature=0)\n", + "\n", + "qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(model, chain_type=\"stuff\", retriever=ensemble_retriever, return_source_documents=True)\n", + "\n", + "\n", + "def run_qa_with_sources(question):\n", + " \n", + " # Santize the question by removing any trailing question marks\n", + " sanitized_question = question.rstrip(\"?\")\n", + "\n", + " result = qa_with_sources({\"question\": sanitized_question}, return_only_outputs=True)\n", + "\n", + " answer = result['answer']\n", + " source_ids = result['sources']\n", + " source_docs = result['source_documents']\n", + "\n", + " source_urls = set()\n", + " for d in source_docs:\n", + " metadata = d.metadata\n", + " source_id = metadata['source']\n", + " url = metadata['url']\n", + " if source_id in source_ids:\n", + " source_urls.add(url)\n", + " return dict(answer=answer, source_urls=source_urls, source_docs=source_docs)\n", + "\n", + "def ask(question):\n", + " result = run_qa_with_sources(question)\n", + "\n", + " display(Markdown(f\"### Question\"))\n", + " display(Markdown(\"ORIGINAL: \" + question))\n", + "\n", + " display(Markdown(f\"### Answer\"))\n", + " display(Markdown(result[\"answer\"]))\n", + "\n", + " display(Markdown(f\"### Sources\"))\n", + " print(\", \".join(result['source_urls']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def auto_eval():\n", + " bot_answers = []\n", + " source_docs = []\n", + " for d in yaml_data:\n", + " result = run_qa_with_sources(d['question'])\n", + " bot_answers.append(result['answer'])\n", + " source_docs.append(result['source_docs'])\n", + " \n", + " predictions = [{'result': a} for a in bot_answers]\n", + "\n", + " answer_grades = grade_model_answer(yaml_data, predictions)\n", + "\n", + " retrieved_docs = []\n", + " for i, d in enumerate(yaml_data):\n", + " retrieved_doc_text = \"\"\n", + " for j, doc in enumerate(source_docs[i]):\n", + " retrieved_doc_text += \"Doc %s: \" % str(j + 1) + doc.page_content + \" \"\n", + " retrieved = {\"question\": d[\"question\"], \"answer\": d[\"answer\"], \"result\": retrieved_doc_text}\n", + " retrieved_docs.append(retrieved)\n", + "\n", + " retrieval_grades = grade_model_retrieval(yaml_data, retrieved_docs)\n", + "\n", + " df = pd.DataFrame({\n", + " \"question\": [d['question'] for d in yaml_data],\n", + " \"Mava correct answer (True value)\": [d['answer'] for d in yaml_data],\n", + " \"Bot answers\": [p['result'] for p in predictions],\n", + " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades],\n", + " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades]\n", + " })\n", + " print(f\"Bot Accuracy: {df['Answer similarity score'].value_counts()['Correct'] / len(df['Answer similarity score'])}\")\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "auto_eval()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", + "MAVA_MISANSWERED_QUES = [\n", + " \"Am I allowed to use AI in an audit?\",\n", + " \"Can I change my Code4rena username?\",\n", + " \"How do I book a solo audit?\",\n", + " \"Do I need to be certified to participate in an audit?\",\n", + " \"How do bot races work?\",\n", + " \"Can I change my Code4rena profile name?\",\n", + " \"What are scout awards?\",\n", + " \"What are analysis reports?\",\n", + " \"what is an analysis finding?\",\n", + " \"My name wasn't in the award announcements. When can I check on my results?\",\n", + " \"How long does the certification process take?\",\n", + " \"How can I access findings.csv?\",\n", + " \"Can I use chatgpt?\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "labeled_ques = [d['question'] for d in yaml_data]\n", + "eval_set = labeled_ques + MAVA_MISANSWERED_QUES" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eval_results = []\n", + "for q in eval_set:\n", + " result = run_qa_with_sources(q)\n", + " eval_results.append(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame({\n", + " \"question\": [q for q in eval_set],\n", + " \"Bot answers\": [r['answer'] for r in eval_results],\n", + " \"Sources\": [ \", \".join(r['source_urls']) for r in eval_results],\n", + "})\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_excel(\"./outputs/eval_results.xlsx\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ask(\"My wallet was hacked. What do I do?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "c4-chatbot", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/qa_bot/qa_bot.ipynb b/qa_bot/qa_bot.ipynb deleted file mode 100644 index e335a43..0000000 --- a/qa_bot/qa_bot.ipynb +++ /dev/null @@ -1,1020 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CodeArena (C4) Question Answer bot\n", - "\n", - "### Objective\n", - "- This notebook has the PoC work for a Question Answer bot using C4's knowledge bases.\n", - "- The objective of the PoC is to prototype an LLM implementation that can accurately answer questions to their expectation and at the very least perform better than their current bot from [Mava](https://www.mava.app/)\n", - "\n", - "### Observations from the usage of Mava\n", - "- The platform offers Discord support management with ticketing and AI help bot features\n", - "- For the AI help bot, the user is able to specify links to multiple knowledge sources that can be used for answering questions.\n", - "- Based on C4's testing of the Mava bot in the private channel, the following stats were observed:-\n", - " - Total questions asked: 29\n", - " - Total questions mis-answered based on emoji reactions: 13\n", - " - Accuracy - ~55%\n", - "\n", - "### Knowledge Bases\n", - "Based on conversations with their team, the following knowledge bases were identified to be relevant and are the same ones that Mava is using:-\n", - "- [Main Website](https://code4rena.com/)\n", - "- [Docs](https://docs.code4rena.com/) \n", - "\n", - "\n", - "### High-level Approach\n", - "- Crawl and scrape C4’s website and docs using Scrapy lib\n", - "- Convert the html content to markdown format so that the model can better understand the context\n", - "- Use LangChain lib to do the following:-\n", - " - Split the markdown header-separated sections into semantic chunks\n", - " - Embed and store the semantic chunks in an in-memory vector db\n", - " - Use the retrieval augmented functionality to answer the question" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Install all the third-party packages\n", - "\n", - "!pip install 'langchain[llms]'\n", - "!pip install Scrapy\n", - "!pip install html2text\n", - "!pip install lxml\n", - "!pip install python-dotenv\n", - "!pip install \"unstructured[all-docs]\"\n", - "!pip install tiktoken\n", - "!pip install faiss-cpu \n", - "!pip install GitPython\n", - "!pip install notebook\n", - "!pip install chromadb\n", - "!pip install pandas\n", - "!pip install rank_bm25" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# General setup - you can specify OPENAI_API_KEY in .env file\n", - "\n", - "import logging\n", - "from dotenv import load_dotenv\n", - "from IPython.display import display, Markdown, Latex\n", - "\n", - "logging.getLogger().setLevel(logging.INFO)\n", - "load_dotenv()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import getpass\n", - "import os\n", - "\n", - "OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') or getpass.getpass('Enter your OpenAI API key: ')\n", - "\n", - "assert OPENAI_API_KEY, \"Please set OPENAI_API_KEY in your environment variables\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Paths to the data\n", - "\n", - "C4_WEBSITE_STORAGE_DIR = \"knowledge_base/c4/website\"\n", - "C4_DOCS_STORAGE_DIR = \"knowledge_base/c4/docs\"\n", - "C4_GH_DOCS_STORAGE_DIR = \"knowledge_base/c4/gh_docs\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Crawling and Scraping using Scrapy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import scrapy\n", - "import html2text\n", - "import lxml.html\n", - "import json\n", - "from urllib.parse import urlparse\n", - "\n", - "class GenericSpider(scrapy.Spider):\n", - " name = 'generic'\n", - "\n", - " def __init__(self, domain='', storage_dir='.', *args, **kwargs):\n", - " super(GenericSpider, self).__init__(*args, **kwargs)\n", - " self.allowed_domains = [domain]\n", - " self.start_urls = [f'http://{domain}/']\n", - " self.storage_dir = storage_dir\n", - " \n", - " def parse(self, response):\n", - " # Remove unwanted elements using lxml\n", - " tree = lxml.html.fromstring(response.text)\n", - " \n", - " # Remove non-text related tags\n", - " for unwanted in tree.xpath('//script|//img|//video|//audio|//iframe|//object|//embed|//canvas|//svg|//link|//source|//track|//map|//area'):\n", - " unwanted.drop_tree()\n", - "\n", - " cleaned_html = lxml.html.tostring(tree).decode('utf-8')\n", - "\n", - " # Convert HTML to Markdown\n", - " converter = html2text.HTML2Text()\n", - " markdown_text = converter.handle(cleaned_html)\n", - "\n", - " # Save to a markdown file in the specified directory\n", - " if not os.path.exists(self.storage_dir):\n", - " os.makedirs(self.storage_dir)\n", - "\n", - " url = response.url\n", - " page_name = response.url.split(\"/\")[-1] if response.url.split(\"/\")[-1] else \"index\"\n", - "\n", - " filename = os.path.join(self.storage_dir, f'{page_name}.json')\n", - "\n", - " with open(filename, 'w') as f:\n", - " # Store the URL and markdown text in JSON format\n", - " json.dump({'url': url, 'md_content': markdown_text}, f)\n", - "\n", - " # Recursively follow relative links to other pages on the same domain\n", - " for href in response.css('a::attr(href)').getall():\n", - " url = response.urljoin(href)\n", - " if urlparse(url).netloc in self.allowed_domains:\n", - " yield scrapy.Request(url, self.parse)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "NOTE: Data has already been scraped and saved locally as JSON files in the 'knowledge_base/c4' directory. To re-run the scraping, uncomment the code in the cell below.\n", - "\n", - "On re-running the crawler, if you get 'ReactorNotRestartable' error, the notebook kernel would need to be restarted." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from scrapy.crawler import CrawlerRunner\n", - "# from scrapy.utils.project import get_project_settings\n", - "# from twisted.internet import reactor\n", - "\n", - "# settings = get_project_settings()\n", - "\n", - "# runner = CrawlerRunner(settings)\n", - "# runner.crawl(GenericSpider, domain=\"code4rena.com\", storage_dir=C4_WEBSITE_STORAGE_DIR)\n", - "# runner.crawl(GenericSpider, domain=\"docs.code4rena.com\", storage_dir=C4_DOCS_STORAGE_DIR)\n", - "# d = runner.join()\n", - "# d.addBoth(lambda _: reactor.stop())\n", - "# reactor.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get docs from Github Repo" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from git import Repo\n", - "\n", - "# repo = Repo.clone_from(\n", - "# \"https://github.com/code-423n4/docs\", to_path=C4_GH_DOCS_STORAGE_DIR\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Retrieval Augmented Generation using LangChain" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Load locally saved scraped data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from langchain.document_loaders import DirectoryLoader\n", - "from langchain.document_loaders import TextLoader\n", - "\n", - "def load_json_files(dir):\n", - " loader = DirectoryLoader(dir, loader_cls=TextLoader)\n", - " documents = loader.load()\n", - " for d in documents:\n", - " page_content_dict = json.loads(d.page_content)\n", - " d.page_content = page_content_dict['md_content']\n", - " d.metadata['url'] = page_content_dict['url']\n", - " return documents\n", - "\n", - "c4_website_data_list = load_json_files(C4_WEBSITE_STORAGE_DIR)\n", - "c4_docs_data_list = load_json_files(C4_DOCS_STORAGE_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.document_loaders import DirectoryLoader\n", - "from langchain.document_loaders import TextLoader\n", - "\n", - "loader = DirectoryLoader(C4_GH_DOCS_STORAGE_DIR, loader_cls=TextLoader)\n", - "c4_gh_docs_data_list = loader.load()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Split the markdown content into semantic chunks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.text_splitter import (\n", - " RecursiveCharacterTextSplitter,\n", - " Language,\n", - ")\n", - "\n", - "md_splitter = RecursiveCharacterTextSplitter.from_language(\n", - " language=Language.MARKDOWN, chunk_size=2000, chunk_overlap=200\n", - ")\n", - "\n", - "\n", - "website_chunks = md_splitter.split_documents(c4_website_data_list)\n", - "docs_chunks = md_splitter.split_documents(c4_docs_data_list)\n", - "gh_docs_chunks = md_splitter.split_documents(c4_gh_docs_data_list)\n", - "\n", - "print(len(website_chunks))\n", - "print(len(docs_chunks))\n", - "print(len(gh_docs_chunks))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Embed the semantic chunks and store in an in-memory vector db" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.vectorstores import FAISS\n", - "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.vectorstores import Chroma\n", - "\n", - "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", - "\n", - "embeddings = OpenAIEmbeddings()\n", - "Chroma(\"vectorstore\").delete_collection()\n", - "vectorstore = Chroma(\"vectorstore\", embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", - "\n", - "vectorstore.add_documents(website_chunks)\n", - "#vectorstore.add_documents(docs_chunks)\n", - "vectorstore.add_documents(gh_docs_chunks)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Retrieval Augmented Generation\n", - "Workflow \n", - "1. Use faster LLM (GPT-3.5) to generate 3 rephrased variants of the original user question to improve question quality which in-turn should improve retrieval\n", - "2. Use the rephrased question to generate the final answer using RAG" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Generate rephrased questions\n", - "Use faster LLM (GPT-3.5) to generate 3 rephrased variants of the original user question to improve question quality which in-turn should improve retrieval" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import LLMChain\n", - "from langchain.chat_models import ChatOpenAI\n", - "from langchain.prompts import PromptTemplate\n", - "\n", - "prompt_template = \"\"\"You are a teacher who is helping a student ask the right questions about a service so that they can look in the most relevant places to find the answer. \n", - "# INSTRUCTIONS\n", - "- You are given student's question below\n", - "- Using the original question, generate 3 alternative questions that are rephrased to be not vague or ambiguous so as to clearly convey the same meaning and context as the original question\n", - "- Return the final result as a JSON object containing a list of rephrased questions as \"new_questions\" field\n", - "\n", - "# QUESTION\n", - "{question}\n", - "\n", - "# RESULT\n", - "\"\"\"\n", - "\n", - "\n", - "def generate_rephrased_questions(question):\n", - " chat = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", - " llm_chain = LLMChain(llm=chat, prompt=PromptTemplate.from_template(prompt_template))\n", - "\n", - " result = llm_chain(inputs={\"question\": question}, return_only_outputs=True)\n", - " result_dict = json.loads(result['text'])\n", - " new_questions = result_dict['new_questions']\n", - " return new_questions\n", - "\n", - "generate_rephrased_questions(\"What are scout awards?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Generate final answer using RAG" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def display_result(question, result):\n", - " display(Markdown(f\"### Question\"))\n", - " display(Markdown(\"ORIGINAL: \" + question))\n", - " display(Markdown(\"REPHRASED: \" + f\"{result['rephrased_question'] if result['rephrased_question'] else 'None'}\"))\n", - "\n", - " display(Markdown(f\"### Answer\"))\n", - " display(Markdown(result[\"result\"]))\n", - "\n", - " display(Markdown(f\"### Sources\"))\n", - " sources = [r.metadata['url'] if 'url' in r.metadata else r.metadata['source'] for r in result[\"source_documents\"] ]\n", - " print(\", \".join(sources))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import RetrievalQA\n", - "from langchain.chat_models import ChatOpenAI\n", - "\n", - "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), return_source_documents=True)\n", - "\n", - "\n", - "def call_llm(question, use_rephrased_questions=True):\n", - " if not use_rephrased_questions:\n", - " result = qa({\"query\": question})\n", - " result['rephrased_question'] = None\n", - " return result\n", - "\n", - "\n", - " # Get rephrased questions\n", - " rephrased_questions = generate_rephrased_questions(question)\n", - "\n", - " # Attempt each question until a valid result is found\n", - " for q in rephrased_questions:\n", - " result = qa({\"query\": q})\n", - " answer = result['result']\n", - " result['rephrased_question'] = None\n", - " \n", - " # If the model is unable to find an answer, it returns 'sorry' in the response, we try again with a different question\n", - " if 'sorry' in answer.lower():\n", - " continue\n", - " else:\n", - " result['rephrased_question'] = q\n", - " break\n", - "\n", - " return result\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### AutoEvaluator\n", - "Using LangChain's [AutoEvaluator technique](https://autoevaluator.langchain.com/) to evaluate the bot's performance on the dataset of C4 questions correctly answered by Mava as per team feedback\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import yaml\n", - "\n", - "# load yaml file\n", - "with open('knowledge_base/c4/c4_mava_correct_ans_set.yaml') as file:\n", - " # The FullLoader parameter handles the conversion from YAML\n", - " # scalar values to Python the dictionary format\n", - " yaml_data = yaml.load(file, Loader=yaml.FullLoader)\n", - "\n", - "mava_questions = [d['question'] for d in yaml_data]\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "\n", - "template = \"\"\" \n", - " You are a grader trying to determine if a set of retrieved documents will help a student answer a question. \\n\n", - "\n", - " Here is the question: \\n\n", - " {query}\n", - "\n", - " Here are the documents retrieved to answer question: \\n\n", - " {result}\n", - " \n", - " Here is the correct answer to the question: \\n \n", - " {answer}\n", - " \n", - " Criteria: \n", - " relevance: Do all of the documents contain information that will help the student arrive that the correct answer to the question?\"\n", - "\n", - " Your response should be as follows:\n", - "\n", - " GRADE: (Correct or Incorrect, depending if all of the documents retrieved meet the criterion)\n", - " (line break)\n", - " JUSTIFICATION: (Write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Use three sentences maximum. Keep the answer as concise as possible.)\n", - " \"\"\"\n", - "\n", - "GRADE_DOCS_PROMPT = PromptTemplate(input_variables=['result', 'answer', 'query'], template=template)\n", - "\n", - "template = \"\"\"You are a teacher grading a quiz. \n", - "You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either Correct or Incorrect.\n", - "\n", - "Example Format:\n", - "QUESTION: question here\n", - "STUDENT ANSWER: student's answer here\n", - "TRUE ANSWER: true answer here\n", - "GRADE: Correct or Incorrect here\n", - "\n", - "Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. If the student answers that there is no specific information provided in the context, then the answer is Incorrect. Begin! \n", - "\n", - "QUESTION: {query}\n", - "STUDENT ANSWER: {result}\n", - "TRUE ANSWER: {answer}\n", - "GRADE:\n", - "\n", - "Your response should be as follows:\n", - "\n", - "GRADE: (Correct or Incorrect)\n", - "(line break)\n", - "JUSTIFICATION: (Without mentioning the student/teacher framing of this prompt, explain why the STUDENT ANSWER is Correct or Incorrect. Use one or two sentences maximum. Keep the answer as concise as possible.)\n", - "\"\"\"\n", - "\n", - "GRADE_ANSWER_PROMPT = PromptTemplate(input_variables=[\"query\", \"result\", \"answer\"], template=template)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.evaluation.qa import QAEvalChain\n", - "\n", - "def grade_model_answer(predicted_dataset, predictions):\n", - "\n", - " # Create an evaluation chain\n", - " eval_chain = QAEvalChain.from_llm(\n", - " llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0),\n", - " prompt=GRADE_ANSWER_PROMPT\n", - " )\n", - "\n", - " # Evaluate the predictions and ground truth using the evaluation chain\n", - " graded_outputs = eval_chain.evaluate(\n", - " predicted_dataset,\n", - " predictions,\n", - " question_key=\"question\",\n", - " prediction_key=\"result\"\n", - " )\n", - "\n", - " return graded_outputs\n", - "\n", - "\n", - "def grade_model_retrieval(gt_dataset, predictions):\n", - " # Create an evaluation chain\n", - " eval_chain = QAEvalChain.from_llm(\n", - " llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0),\n", - " prompt=GRADE_DOCS_PROMPT\n", - " )\n", - "\n", - " # Evaluate the predictions and ground truth using the evaluation chain\n", - " graded_outputs = eval_chain.evaluate(\n", - " gt_dataset,\n", - " predictions,\n", - " question_key=\"question\",\n", - " prediction_key=\"result\"\n", - " )\n", - " return graded_outputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bot_answers = []\n", - "source_docs = []\n", - "for d in yaml_data:\n", - " result = call_llm(d['question'])\n", - " bot_answers.append(result['result'])\n", - " source_docs.append(result['source_documents'])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "predictions = [{'result': a} for a in bot_answers]\n", - "\n", - "answer_grades = grade_model_answer(yaml_data, predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "retrieved_docs = []\n", - "for i, d in enumerate(yaml_data):\n", - " retrieved_doc_text = \"\"\n", - " for j, doc in enumerate(source_docs[i]):\n", - " retrieved_doc_text += \"Doc %s: \" % str(j + 1) + doc.page_content + \" \"\n", - " retrieved = {\"question\": d[\"question\"], \"answer\": d[\"answer\"], \"result\": retrieved_doc_text}\n", - " retrieved_docs.append(retrieved)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "retrieval_grades = grade_model_retrieval(yaml_data, retrieved_docs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "df = pd.DataFrame({\n", - " \"question\": [d['question'] for d in yaml_data],\n", - " \"Mava correct answer (True value)\": [d['answer'] for d in yaml_data],\n", - " \"Bot answers\": [p['result'] for p in predictions],\n", - " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades],\n", - " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades]\n", - "})\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### HyDE technique\n", - "This technique can help improve information retrieval\n", - "\n", - "https://python.langchain.com/docs/use_cases/question_answering/how_to/hyde" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "collection_name = \"vectorstore_hyde\"\n", - "Chroma(collection_name).delete_collection()\n", - "\n", - "vectorstore_hyde = Chroma(collection_name, embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", - "vectorstore_hyde.add_documents(website_chunks)\n", - "vectorstore_hyde.add_documents(gh_docs_chunks)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.vectorstores.base import VectorStoreRetriever\n", - "from langchain.callbacks.manager import (\n", - " AsyncCallbackManagerForRetrieverRun,\n", - " CallbackManagerForRetrieverRun,\n", - ")\n", - "from langchain.docstore.document import Document\n", - "from typing import List\n", - "\n", - "class HydeRetriever(VectorStoreRetriever):\n", - "\n", - " def _get_relevant_documents(\n", - " self, query: str, *, run_manager: CallbackManagerForRetrieverRun\n", - " ) -> List[Document]:\n", - " llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", - "\n", - " web_search_template = \"\"\"Please write a passage to answer the question \n", - " Question: {QUESTION}\n", - " Passage:\"\"\"\n", - "\n", - " web_search = PromptTemplate(template=web_search_template, input_variables=[\"QUESTION\"])\n", - "\n", - " llm_chain = LLMChain(llm=llm, prompt=web_search)\n", - "\n", - " result = llm_chain(inputs={\"QUESTION\": query}, return_only_outputs=True)\n", - " hyquery = result['text']\n", - "\n", - " return super()._get_relevant_documents(hyquery, run_manager=run_manager)\n", - "\n", - "\n", - "hyde_retriever = HydeRetriever(vectorstore=vectorstore_hyde)\n", - "\n", - "hyde_retriever.get_relevant_documents(\"How can I access findings.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name=\"gpt-4\", temperature=0), chain_type=\"stuff\", retriever=hyde_retriever, return_source_documents=True)\n", - "\n", - "\n", - "def call_hyde_llm(question):\n", - " result = qa({\"query\": question})\n", - " result['rephrased_question'] = None\n", - " return result\n", - "\n", - "def ask_hyde(question):\n", - " result = call_hyde_llm(question)\n", - " display_result(question, result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Vector Store with Sources" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.vectorstores import Chroma\n", - "\n", - "# NOTE: At times, OpenAI Embedding service can fail intermittently and return errorneous values such as [NaN], more info: https://github.com/langchain-ai/langchain/pull/7070\n", - "\n", - "embeddings = OpenAIEmbeddings()\n", - "collection_name = \"vectorstore_with_sources\"\n", - "Chroma(collection_name).delete_collection()\n", - "vectorstore_with_sources = Chroma(collection_name, embeddings, collection_metadata={\"hnsw:space\": \"cosine\"})\n", - "\n", - "for i, d in enumerate(website_chunks):\n", - " dd = d.copy()\n", - " dd.metadata['source'] = f\"w{i}-pl\"\n", - " vectorstore_with_sources.add_documents([dd])\n", - "\n", - "for i, d in enumerate(gh_docs_chunks):\n", - " dd = d.copy()\n", - " local_path = dd.metadata['source']\n", - " dd.metadata['source'] = f\"g{i}-pl\"\n", - " dd.metadata['url'] = f\"{local_path.replace(C4_GH_DOCS_STORAGE_DIR, 'https://github.com/code-423n4/docs/blob/main/')}\"\n", - " vectorstore_with_sources.add_documents([dd])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### MultiQuery approach" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chat_models import ChatOpenAI\n", - "from langchain.retrievers.multi_query import MultiQueryRetriever\n", - "\n", - "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", - "multiquery_retriever = MultiQueryRetriever.from_llm(\n", - " retriever=vectorstore_with_sources.as_retriever(), llm=llm\n", - ")\n", - "import logging\n", - "\n", - "logging.basicConfig()\n", - "logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.ERROR)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n", - "\n", - "lowercased_website_chunks = []\n", - "for d in website_chunks:\n", - " dd = d.copy()\n", - " dd.page_content = d.page_content.lower()\n", - " lowercased_website_chunks.append(dd)\n", - "\n", - "\n", - "lowercased_gh_docs_chunks = []\n", - "for d in gh_docs_chunks:\n", - " dd = d.copy()\n", - " dd.page_content = d.page_content.lower()\n", - " lowercased_gh_docs_chunks.append(dd)\n", - "\n", - "bm25_retriever = BM25Retriever.from_documents(lowercased_website_chunks + lowercased_gh_docs_chunks)\n", - "bm25_retriever.k = 2\n", - "\n", - "ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, multiquery_retriever], weights=[0.5, 0.5])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Final Implementation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import RetrievalQAWithSourcesChain\n", - "from langchain.chat_models import ChatOpenAI\n", - "\n", - "\n", - "model = ChatOpenAI(model_name=\"gpt-4\", temperature=0)\n", - "\n", - "qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(model, chain_type=\"stuff\", retriever=ensemble_retriever, return_source_documents=True)\n", - "\n", - "\n", - "def run_qa_with_sources(question):\n", - " \n", - " # Santize the question by removing any trailing question marks\n", - " sanitized_question = question.rstrip(\"?\")\n", - "\n", - " result = qa_with_sources({\"question\": sanitized_question}, return_only_outputs=True)\n", - "\n", - " answer = result['answer']\n", - " source_ids = result['sources']\n", - " source_docs = result['source_documents']\n", - "\n", - " source_urls = set()\n", - " for d in source_docs:\n", - " metadata = d.metadata\n", - " source_id = metadata['source']\n", - " url = metadata['url']\n", - " if source_id in source_ids:\n", - " source_urls.add(url)\n", - " return dict(answer=answer, source_urls=source_urls, source_docs=source_docs)\n", - "\n", - "def ask(question):\n", - " result = run_qa_with_sources(question)\n", - "\n", - " display(Markdown(f\"### Question\"))\n", - " display(Markdown(\"ORIGINAL: \" + question))\n", - "\n", - " display(Markdown(f\"### Answer\"))\n", - " display(Markdown(result[\"answer\"]))\n", - "\n", - " display(Markdown(f\"### Sources\"))\n", - " print(\", \".join(result['source_urls']))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def auto_eval():\n", - " bot_answers = []\n", - " source_docs = []\n", - " for d in yaml_data:\n", - " result = run_qa_with_sources(d['question'])\n", - " bot_answers.append(result['answer'])\n", - " source_docs.append(result['source_docs'])\n", - " \n", - " predictions = [{'result': a} for a in bot_answers]\n", - "\n", - " answer_grades = grade_model_answer(yaml_data, predictions)\n", - "\n", - " retrieved_docs = []\n", - " for i, d in enumerate(yaml_data):\n", - " retrieved_doc_text = \"\"\n", - " for j, doc in enumerate(source_docs[i]):\n", - " retrieved_doc_text += \"Doc %s: \" % str(j + 1) + doc.page_content + \" \"\n", - " retrieved = {\"question\": d[\"question\"], \"answer\": d[\"answer\"], \"result\": retrieved_doc_text}\n", - " retrieved_docs.append(retrieved)\n", - "\n", - " retrieval_grades = grade_model_retrieval(yaml_data, retrieved_docs)\n", - "\n", - " df = pd.DataFrame({\n", - " \"question\": [d['question'] for d in yaml_data],\n", - " \"Mava correct answer (True value)\": [d['answer'] for d in yaml_data],\n", - " \"Bot answers\": [p['result'] for p in predictions],\n", - " \"Retrieval relevancy score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in retrieval_grades],\n", - " \"Answer similarity score\": ['Incorrect' if 'Incorrect' in g['results'] else 'Correct' for g in answer_grades]\n", - " })\n", - " print(f\"Bot Accuracy: {df['Answer similarity score'].value_counts()['Correct'] / len(df['Answer similarity score'])}\")\n", - " \n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "auto_eval()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Questions that were answered incorrectly by the Mava bot as per emoji reaction in the test channel\n", - "MAVA_MISANSWERED_QUES = [\n", - " \"Am I allowed to use AI in an audit?\",\n", - " \"Can I change my Code4rena username?\",\n", - " \"How do I book a solo audit?\",\n", - " \"Do I need to be certified to participate in an audit?\",\n", - " \"How do bot races work?\",\n", - " \"Can I change my Code4rena profile name?\",\n", - " \"What are scout awards?\",\n", - " \"What are analysis reports?\",\n", - " \"what is an analysis finding?\",\n", - " \"My name wasn't in the award announcements. When can I check on my results?\",\n", - " \"How long does the certification process take?\",\n", - " \"How can I access findings.csv?\",\n", - " \"Can I use chatgpt?\"\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "labeled_ques = [d['question'] for d in yaml_data]\n", - "eval_set = labeled_ques + MAVA_MISANSWERED_QUES" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "eval_results = []\n", - "for q in eval_set:\n", - " result = run_qa_with_sources(q)\n", - " eval_results.append(result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({\n", - " \"question\": [q for q in eval_set],\n", - " \"Bot answers\": [r['answer'] for r in eval_results],\n", - " \"Sources\": [ \", \".join(r['source_urls']) for r in eval_results],\n", - "})\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df.to_excel(\"./outputs/eval_results.xlsx\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ask(\"My wallet was hacked. What do I do?\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "c4-chatbot", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.7" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -}