diff --git a/llm_abbreviation_enrichment/llm_abbreviations_iintro.md b/llm_abbreviation_enrichment/llm_abbreviations_iintro.md new file mode 100644 index 00000000..dad4940a --- /dev/null +++ b/llm_abbreviation_enrichment/llm_abbreviations_iintro.md @@ -0,0 +1,145 @@ +# Making LLM work with abbreviations +
+
+
+import os
+import openai
+import json
+from dotenv import load_dotenv
+load_dotenv()
+
+openai.api_type = os.getenv("API_TYPE")
+openai.api_version = os.getenv("API_VERSION_FUNCTION_CALL")
+openai.api_key = os.getenv("API_KEY")
+openai.api_base = os.getenv("API_BASE")
+ENGINE_API= os.getenv("ENGINE")
+max_tokens=2000
+temperature=0.0
+top_p=0
+frequency_penalty=0.0
+presence_penalty=0.0
+stop=None
+from aoai1_poc_github import _company_corpus_search
+
+
+def _aoai_company_function_token(prompt):
+
+ messages = [
+ {"role": "system", "content": """You're an AI assistant designed to help users search internal data corpus.
+ You must handle a variety of company name abbreviations.
+ """},
+ {"role": "system", "name":"example_user", "content": "What are the latest RD documents?"},
+ {"role": "system", "name": "example_assistant", "content": "arguments: {\n \"abbreviations\": \"Research and Development\"\n}"},
+ {"role": "system", "name":"example_user", "content": "What are the latest EGA documents?"},
+ {"role": "system", "name": "example_assistant", "content": "arguments: {\n \"abbreviations\": \"Executive General and Administration\"\n}"},
+ {"role": "user", "content": prompt}]
+ functions = [
+ {
+ "name": "_company_corpus_search",
+ "description": "Gets business's document information",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "document_type": {"type": "string", "description": "Type of document."},
+ "abbreviations": {
+ "type": "string",
+ "description": "Gets the abbrivation for the company entity return the full name from the followig list:SM=Sales and Marketing, IM=Inventory Management, M=Manufacturing, EGA=Executive General and Administration, AZ=Quality Assurance "},
+ },
+ "required": [],
+ }
+ },
+
+
+ ]
+ openai.api_type = os.getenv("API_TYPE")
+ openai.api_version = os.getenv("API_VERSION_FUNCTION_CALL")
+ openai.api_key = os.getenv("API_KEY")
+ openai.api_base = os.getenv("API_BASE")
+ response = openai.ChatCompletion.create(
+ engine=ENGINE_API,
+ messages=messages,
+ functions=functions,
+ function_call="auto", #auto is default, but we'll be explicit
+ )
+ response_message = response["choices"][0]["message"]
+ print(response_message)
+
+ #convert OpenAIObject to a JSON-formatted string
+ json_str = str(response_message)
+
+ #load JSON string into a dictionary
+ json_dict = json.loads(json_str)
+
+ #get the function name and arguments from the JSON dictionary
+ func_name = json_dict['function_call']['name']
+ func_args_str = json.loads(json_dict['function_call']['arguments'])
+ print(func_name)
+
+ # convert function arguments string to a dictionary
+ #func_args = json.loads(func_args_str)
+ API_KEY = os.getenv("NEWOPENAI_KEY")
+ RESOURCE_ENDPOINT = os.getenv("NEWRESOURCE_ENDPOINT")
+ openai.api_type = "azure"
+ openai.api_key = API_KEY
+ openai.api_base = RESOURCE_ENDPOINT
+ openai.api_version = "2023-03-15-preview"
+ #determine which function to call based on the function name
+
+ # Parse the arguments JSON string
+ document_type = func_args_str.get('document_type')
+ abbreviations = func_args_str.get('abbreviations')
+ print(abbreviations)
+
+ if func_name == '_company_corpus_search':
+ result = _company_corpus_search(prompt=prompt, document_type=document_type, abbreviations=abbreviations)
+ else:
+ result = 'There was an issue selecting the function'
+ return result
+
+
+
+
+
+
+_________________________________________________________________
+Using this method, we are able to break down user questions via classification tokens and then pass those adjectives, nouns, abbreviations, etc., to the respective function for processing and enrichment.
+This also provides a way to leverage multiple functions depending on the users question. For example, we can call a function that stores data in cognitive search, Azure SQL or even external APIs. Then have it all processed and summarized by the language model.
+_________________________________________________________________
+
+Finally, when we integrate the vector database, in this case Azure Cosmos DB, with the function, with no detailed instructions. We are able to see the full power of the vector engine below. This plays an integral role when organizations have a large corpus of data that cannot easily fit into the instructions and need to be embedded.
+
+
+
+We would then pass the Full Abbreviation results to the language model or rewrite the question before performing the full LLM request. This will essentially ground the model and provide appropriate context of the users question.
+
+
+
+**For more information or even to have this conecpt demoed live. Please feel free to conact your local CSA (Cloud Solution Architect)**
+For the full instructions and code stack, please see the above repo folders.
+
+
+
+
+*Created by: Victor Adeyanju CSA*