diff --git a/.gitignore b/.gitignore
index 93ecc0ff..3ee85d33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ data/
 embedding_model/*
 !embedding_model/.ignore
 .DS_Store
+.conda
+.gitignore
diff --git a/FREAloadcontent.py b/FREAloadcontent.py
new file mode 100644
index 00000000..aefd3d15
--- /dev/null
+++ b/FREAloadcontent.py
@@ -0,0 +1,160 @@
+
+def readfile(file_info):
+    pathname = file_info['path']
+    if file_info['type'] == 'text/plain':
+        with open(pathname, 'r') as file:
+            data = file.read()
+        return data
+    if file_info['type'] == 'application/pdf':
+        pdf_text = ""
+        try:
+            fd = open(pathname, "rb")
+            viewer = SimplePDFViewer(fd)
+            pdf_text = viewer.render()
+        except Exception as e:
+            print(f"Error reading PDF file: {e}")
+        return pdf_text
+    
+
+def maketags(file_info):
+    substraction = 'C:/SyncedFolder/Team Shares/FREA/'
+    pathname = file_info['path'] 
+    tagstring = pathname.replace(substraction, '')
+    tagstring2 = tagstring.replace(file_info['name'], '')
+    tags = tagstring2.split('/')
+    print(tags)
+    return tags
+
+def process_file_getinfo(file_info):
+    return_data = {}
+    #data =readfile(file_info)
+    tags = maketags(file_info) 
+    #return_data['data'] = data
+    #return_data['tags'] = tags
+
+    return tags
+
+
+def functext(file_info):
+    process_file_getinfo(file_info)
+
+
+#text/html
+def funcWebPages(file_info):
+    process_file_getinfo(file_info)
+
+ #       'text/markdown': 
+def funcMarkdown(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/xml': 
+def funcXML(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/pdf': 
+def funcPDF(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/msword': 
+def funcDOC(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 
+def funcDOCX(file_info):
+    process_file_getinfo(file_info)
+
+  #      'application/vnd.ms-excel (XLS)':
+def funcXLS(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
+def funcXLSX(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/vnd.ms-powerpoint (PPT)':
+def funcPPT(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/vnd.openxmlformats-officedocument.presentationml.presentation':
+def funcPPTX(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/rtf':
+def funcRTF(file_info):
+    process_file_getinfo(file_info)
+
+ #       'image/jpeg':
+def funcJPG(file_info):
+    process_file_getinfo(file_info)
+
+  #      'image/png':
+def funcPNG(file_info):
+    process_file_getinfo(file_info)
+
+ #       'image/gif': 
+def funcGIF(file_info):
+    process_file_getinfo(file_info)
+
+#        'image/bmp': 
+def funcBMP(file_info):
+    process_file_getinfo(file_info)
+
+ #       'image/tiff':
+def funcTIFF(file_info):
+    process_file_getinfo(file_info)
+
+   #     'application/javascript': 
+def funcJavaScript(file_info):
+    process_file_getinfo(file_info)
+
+  #      'application/zip': 
+def funcZIP(file_info):
+    process_file_getinfo(file_info)
+
+  #      'application/gzip': 
+def funcGZIP(file_info):
+    process_file_getinfo(file_info)
+
+  #      'audio/mpeg': 
+def funcMP3(file_info):
+    process_file_getinfo(file_info)
+
+#        'video/mp4': 
+def funcMP4(file_info):
+    process_file_getinfo(file_info)
+
+    #    'audio/wav': 
+def funcWAV(file_info):
+    process_file_getinfo(file_info)
+
+ #       'audio/ogg': 
+def funcOGG(file_info):
+    process_file_getinfo(file_info)
+
+  #      'video/webm': 
+def funcWEBM(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/json': 
+def funcJSON(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/x-yaml': 
+def funcYAML(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/epub+zip': 
+def funcEPUB(file_info):
+    process_file_getinfo(file_info)
+
+ #       'application/x-mobipocket-ebook': 
+def funcMOBI(file_info):
+    process_file_getinfo(file_info) 
+
+def funcnone(file_info):
+    process_file_getinfo(file_info) 
+
+
+    
+    
+    
\ No newline at end of file
diff --git a/FREAloader.Dockerfile b/FREAloader.Dockerfile
new file mode 100644
index 00000000..ebd46cc9
--- /dev/null
+++ b/FREAloader.Dockerfile
@@ -0,0 +1,24 @@
+FROM langchain/langchain
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN pip install --upgrade -r requirements.txt
+
+COPY FREAloader.py .
+COPY utils.py .
+COPY chains.py .
+COPY images ./images
+
+EXPOSE 8506
+
+HEALTHCHECK CMD curl --fail http://localhost:8502/_stcore/health
+
+ENTRYPOINT ["streamlit", "run", "FREAloader.py", "--server.port=8506", "--server.address=0.0.0.0"]
diff --git a/FREAloader.py b/FREAloader.py
new file mode 100644
index 00000000..967c0f2c
--- /dev/null
+++ b/FREAloader.py
@@ -0,0 +1,226 @@
+import os
+import requests
+import mimetypes
+from dotenv import load_dotenv
+from langchain_community.graphs import Neo4jGraph
+import streamlit as st
+from streamlit.logger import get_logger
+from chains import load_embedding_model
+from utils import create_constraints, create_vector_index
+from PIL import Image
+import FREAloadcontent as FC
+from pdfreader import SimplePDFViewer
+
+
+load_dotenv(".env")
+
+url = os.getenv("NEO4J_URI")
+username = os.getenv("NEO4J_USERNAME")
+password = os.getenv("NEO4J_PASSWORD")
+ollama_base_url = os.getenv("OLLAMA_BASE_URL")
+embedding_model_name = os.getenv("EMBEDDING_MODEL")
+# Remapping for Langchain Neo4j integration
+os.environ["NEO4J_URL"] = url
+
+logger = get_logger(__name__)
+
+#results = read_files_info('C:/SyncedFolder/Team Shares/FREA/')
+
+#so_api_base_url = "https://api.stackexchange.com/2.3/search/advanced"
+#next(results)
+embeddings, dimension = load_embedding_model(
+    embedding_model_name, config={"ollama_base_url": ollama_base_url}, logger=logger
+)
+
+# if Neo4j is local, you can go to http://localhost:7474/ to browse the database
+neo4j_graph = Neo4jGraph(url=url, username=username, password=password)
+
+create_constraints(neo4j_graph)
+create_vector_index(neo4j_graph, dimension)
+
+def read_files_info(directory='.'):
+    #files_info = []
+    for root, dirs, files in os.walk(directory):
+        for filename in files:
+            file_path = os.path.join(root, filename)
+            info = os.stat(file_path)
+            file_info = {
+                'path': file_path,
+                'name': filename,
+                'type': mimetypes.guess_type(file_path)[0],
+                'size': os.path.getsize(file_path),
+                'creation_time': info.st_ctime, 
+                'modification_time': info.st_mtime  
+            }
+            yield file_info
+
+def get_file_info():
+    file_info = next(results)
+    value = file_info['type']
+    path = file_info['path']
+    name = file_info['name']
+    switch_case(value,file_info)  
+
+results = read_files_info('C:/SyncedFolder/Team Shares/FREA/')
+
+def switch_case(value,file_info):
+    switch = {
+        'text/plain': FC.functext,
+        'text/markdown': FC.funcMarkdown,
+        'application/xml':  FC.funcXML,
+        'application/pdf':  FC.funcPDF,
+        'application/msword':  FC.funcDOC,
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.document':  FC.funcDOCX,
+        'application/vnd.ms-excel (XLS)':  FC.funcXLS,
+        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':  FC.funcXLSX,
+        'application/vnd.ms-powerpoint (PPT)':  FC.funcPPT,
+        'application/vnd.openxmlformats-officedocument.presentationml.presentation':  FC.funcPPTX,
+        'application/rtf':  FC.funcRTF,
+        'image/jpeg':  FC.funcJPG,
+        'image/png':  FC.funcPNG,
+        'image/gif':  FC.funcGIF,
+        'image/bmp':  FC.funcBMP,
+        'image/tiff':  FC.funcTIFF,
+        'application/javascript':  FC.funcJavaScript,
+        'application/zip':  FC.funcZIP,
+        'application/gzip': FC.funcGZIP,
+        'audio/mpeg':  FC.funcMP3,
+        'video/mp4':  FC.funcMP4,
+        'audio/wav':  FC.funcWAV,
+        'audio/ogg':  FC.funcOGG,
+        'video/webm':  FC.funcWEBM,
+        'application/json':  FC.funcJSON,
+        'application/x-yaml':  FC.funcYAML,
+        'application/epub+zip':  FC.funcEPUB,
+        'application/x-mobipocket-ebook':  FC.funcMOBI,
+        'None': FC.funcnone,
+    }
+    func = switch.get(value)
+    if func:
+        func(file_info)
+    else:
+        print(f"No function found for file type {value}")
+
+
+def insert_so_data():
+    i = 1
+    while i <= 20:
+        print(i)
+        i += 1
+
+def load_so_data(tag: str = "neo4j", page: int = 1) -> None:
+     parameters = (
+   
+     )
+    #data = requests.get(so_api_base_url + parameters).json()
+    #insert_so_data():
+
+
+def load_high_score_so_data() -> None:
+    parameters = (
+        
+    )
+    data = requests.get(so_api_base_url + parameters).json()
+    insert_so_data(data)
+
+
+
+
+
+
+'''
+def insert_so_data(data: dict) -> None:
+    # Calculate embedding values for questions and answers
+    for q in data["items"]:
+        question_text = q["title"] + "\n" + q["body_markdown"]
+        q["embedding"] = embeddings.embed_query(question_text)
+        for a in q["answers"]:
+            a["embedding"] = embeddings.embed_query(
+                question_text + "\n" + a["body_markdown"]
+            )
+
+    # Cypher, the query language of Neo4j, is used to import the data
+    # https://neo4j.com/docs/getting-started/cypher-intro/
+    # https://neo4j.com/docs/cypher-cheat-sheet/5/auradb-enterprise/
+    import_query = """
+    UNWIND $data AS q
+    MERGE (question:Question {id:q.question_id}) 
+    ON CREATE SET question.title = q.title, question.link = q.link, question.score = q.score,
+        question.favorite_count = q.favorite_count, question.creation_date = datetime({epochSeconds: q.creation_date}),
+        question.body = q.body_markdown, question.embedding = q.embedding
+    FOREACH (tagName IN q.tags | 
+        MERGE (tag:Tag {name:tagName}) 
+        MERGE (question)-[:TAGGED]->(tag)
+    )
+    FOREACH (a IN q.answers |
+        MERGE (question)<-[:ANSWERS]-(answer:Answer {id:a.answer_id})
+        SET answer.is_accepted = a.is_accepted,
+            answer.score = a.score,
+            answer.creation_date = datetime({epochSeconds:a.creation_date}),
+            answer.body = a.body_markdown,
+            answer.embedding = a.embedding
+        MERGE (answerer:User {id:coalesce(a.owner.user_id, "deleted")}) 
+        ON CREATE SET answerer.display_name = a.owner.display_name,
+                      answerer.reputation= a.owner.reputation
+        MERGE (answer)<-[:PROVIDED]-(answerer)
+    )
+    WITH * WHERE NOT q.owner.user_id IS NULL
+    MERGE (owner:User {id:q.owner.user_id})
+    ON CREATE SET owner.display_name = q.owner.display_name,
+                  owner.reputation = q.owner.reputation
+    MERGE (owner)-[:ASKED]->(question)
+    """
+    neo4j_graph.query(import_query, {"data": data["items"]})
+'''
+
+# Streamlit
+def get_tag() -> str:
+    input_text = st.text_input(
+        "Which tag questions do you want to import?", value="test automation"
+    )
+    return input_text
+
+
+def get_pages():
+    col1, col2 = st.columns(2)
+    with col1:
+        num_pages = st.number_input(
+            "Number of pages (100 questions per page)", step=1, min_value=1
+        )
+    with col2:
+        start_page = st.number_input("Start page", step=1, min_value=1)
+    st.caption("Only questions with answers will be imported.")
+    return (int(num_pages), int(start_page))
+
+
+def render_page():
+    datamodel_image = Image.open("./images/datamodel.png")
+    st.header("StackOverflow Loader")
+    st.subheader("Choose StackOverflow tags to load into Neo4j")
+    st.caption("Go to http://localhost:7474/ to explore the graph.")
+
+    #user_input = get_tag()
+    #num_pages, start_page = get_pages()
+
+    if st.button("Import", type="primary"):
+        with st.spinner("Loading... This might take a minute or two."):
+            try:
+                for page in range(1, num_pages + 1):
+                    load_so_data(user_input, start_page + (page - 1))
+                st.success("Import successful", icon="✅")
+                st.caption("Data model")
+                st.image(datamodel_image)
+                st.caption("Go to http://localhost:7474/ to interact with the database")
+            except Exception as e:
+                st.error(f"Error: {e}", icon="🚨")
+    with st.expander("Highly ranked questions rather than tags?"):
+        if st.button("Import highly ranked questions"):
+            with st.spinner("Loading... This might take a minute or two."):
+                try:
+                    load_high_score_so_data()
+                    st.success("Import successful", icon="✅")
+                except Exception as e:
+                    st.error(f"Error: {e}", icon="🚨")
+
+
+render_page()
diff --git a/__pycache__/chains.cpython-310.pyc b/__pycache__/chains.cpython-310.pyc
new file mode 100644
index 00000000..1a37e95b
Binary files /dev/null and b/__pycache__/chains.cpython-310.pyc differ
diff --git a/__pycache__/utils.cpython-310.pyc b/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 00000000..865c0a7a
Binary files /dev/null and b/__pycache__/utils.cpython-310.pyc differ
diff --git a/docker-compose.yml b/docker-compose.yml
index 3a1bbc08..7dc8f029 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -36,7 +36,7 @@ services:
       - 7687:7687
       - 7474:7474
     volumes:
-      - $PWD/data:/data
+      - "D:/data:/data"
     environment:
       - NEO4J_AUTH=${NEO4J_USERNAME-neo4j}/${NEO4J_PASSWORD-password}
       - NEO4J_PLUGINS=["apoc"]
@@ -91,6 +91,47 @@ services:
       - 8081:8080
       - 8502:8502
 
+  frealoader:
+    build:
+      context: .
+      dockerfile: FREAloader.Dockerfile
+    volumes:
+      - $PWD/embedding_model:/embedding_model
+    environment:
+      - NEO4J_URI=${NEO4J_URI-neo4j://database:7687}
+      - NEO4J_PASSWORD=${NEO4J_PASSWORD-password}
+      - NEO4J_USERNAME=${NEO4J_USERNAME-neo4j}
+      - OPENAI_API_KEY=${OPENAI_API_KEY-}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY-}      
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL-http://host.docker.internal:11434}
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL-sentence_transformer}
+      - LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT-"https://api.smith.langchain.com"}
+      - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2-false}
+      - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT}
+      - LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+      - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
+    networks:
+      - net
+    depends_on:
+      database:
+        condition: service_healthy
+      pull-model:
+        condition: service_completed_successfully
+    x-develop:
+      watch:
+        - action: rebuild
+          path: .
+          ignore:
+            - bot.py
+            - pdf_bot.py
+            - api.py
+            - front-end/
+    ports:
+      - 8082:8080
+      - 8506:8506
+
 
   bot:
     build:
diff --git a/functions/YoLo/weights/person_yolov8m-seg.pt b/functions/YoLo/weights/person_yolov8m-seg.pt
new file mode 100644
index 00000000..d17330b5
Binary files /dev/null and b/functions/YoLo/weights/person_yolov8m-seg.pt differ
diff --git a/functions/YoLo/yolo_seg b/functions/YoLo/yolo_seg
new file mode 100644
index 00000000..1f30a3b5
--- /dev/null
+++ b/functions/YoLo/yolo_seg
@@ -0,0 +1,80 @@
+import torch
+from ultralytics import YOLO
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+
+
+def segment_image(image_path, model_weights, output_path=None):
+   
+    # Load the YOLOv8 model with the specified weights
+    model = YOLO(model_weights)
+    
+    # Read the image
+    image = cv2.imread(image_path)
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    
+    # Perform segmentation
+    results = model(image_rgb)
+    
+    # Extract the segmented mask
+    segmented_image = results[0].masks.data[0].numpy()
+    
+    # Convert the mask to a binary mask
+    segmented_image = (segmented_image > 0.5).astype('uint8') * 255
+    
+    # Save the segmented image if output path is specified
+    if output_path:
+        cv2.imwrite(output_path, segmented_image)
+    
+    # Return the segmented image
+    return segmented_image
+
+
+def image_to_numpy_opencv(image_path):
+   
+    image_array = cv2.imread(image_path)
+    return image_array
+
+def load_mask(mask_path):
+   
+    mask_image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
+    _, mask_array = cv2.threshold(mask_image, 127, 1, cv2.THRESH_BINARY)
+    return mask_array
+
+
+def extract_masked_pixels(image, mask):
+   
+    # Ensure the mask is binary
+    mask = mask.astype(np.uint8)
+    
+    # Create an empty image with the same dimensions as the original
+    masked_image = np.zeros_like(image)
+
+    # Copy the pixels from the original image to the masked image where the mask is 1
+    masked_image[mask == 1] = image[mask == 1]
+
+    return masked_image
+
+
+
+
+# Example usage
+original_image = './images/me.jpg'
+model_weights = './functions/YoLo/weights/person_yolov8m-seg.pt'  # Path to open-source YOLOv8 weights
+binary_mask = './images/output_segmented.jpg'
+
+segmented_image = segment_image(original_image, model_weights, binary_mask)
+
+original_imageNP = image_to_numpy_opencv(original_image)
+binary_mask = load_mask(binary_mask)  # Ensure the mask is loaded correctly
+
+result = extract_masked_pixels(original_image, binary_mask)
+cv2.imwrite('masked_image.png', result)
+
+
+
+
+
diff --git a/functions/functions.py b/functions/functions.py
new file mode 100644
index 00000000..c3312f35
--- /dev/null
+++ b/functions/functions.py
@@ -0,0 +1,40 @@
+from neo4j import GraphDatabase
+from dotenv import load_dotenv
+url = os.getenv("NEO4J_URI")
+username = os.getenv("NEO4J_USERNAME")
+password = os.getenv("NEO4J_PASSWORD")
+ollama_base_url = os.getenv("OLLAMA_BASE_URL")
+embedding_model_name = os.getenv("EMBEDDING_MODEL")
+llm_name = os.getenv("LLM")
+# Remapping for Langchain Neo4j integration
+os.environ["NEO4J_URL"] = url
+
+# Define your Neo4j connection details
+##neo4j_uri = "bolt://192.168.1.153:7687"  # Replace with your Neo4j host and port
+#neo4j_username = "neo4j"  # Replace with your Neo4j username
+#neo4j_password = "password"  # Replace with your Neo4j password
+
+# Connect to the Neo4j database
+driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))
+
+cypher_query = """
+MATCH (n)
+RETURN n
+"""
+
+def run_cypher_query(query):
+    with driver.session() as session:
+        result = session.run(query)
+        return [record for record in result]
+
+
+# Run the Cypher query
+#results = run_cypher_query(cypher_query)
+
+# Process and print the results
+#for record in results:
+    ###print(record)
+
+# Close the Neo4j driver
+#driver.close()
+#
\ No newline at end of file
diff --git a/images/me.jpg b/images/me.jpg
new file mode 100644
index 00000000..6f8dfbb8
Binary files /dev/null and b/images/me.jpg differ
diff --git a/images/output_segmented.jpg b/images/output_segmented.jpg
new file mode 100644
index 00000000..bd350143
Binary files /dev/null and b/images/output_segmented.jpg differ
diff --git a/mimetype/frea_load_DOC_neo.py b/mimetype/frea_load_DOC_neo.py
new file mode 100644
index 00000000..e69de29b
diff --git a/mimetype/frea_load_image_neo.py b/mimetype/frea_load_image_neo.py
new file mode 100644
index 00000000..e69de29b
diff --git a/mimetype/frea_load_pdf_neo.py b/mimetype/frea_load_pdf_neo.py
new file mode 100644
index 00000000..e69de29b
diff --git a/mimetype/frea_load_sheet_neo.py b/mimetype/frea_load_sheet_neo.py
new file mode 100644
index 00000000..e69de29b
diff --git a/mimetype/frea_mimetype_switch_neo.py b/mimetype/frea_mimetype_switch_neo.py
new file mode 100644
index 00000000..a87e1cc6
--- /dev/null
+++ b/mimetype/frea_mimetype_switch_neo.py
@@ -0,0 +1,37 @@
+
+##Plain Text: text/plain
+#HTML: text/html
+#Markdown: text/markdown
+#XML: application/xml
+#Documents
+#PDF: application/pdf
+#Microsoft Word: application/msword (DOC), application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)
+#Microsoft Excel: application/vnd.ms-excel (XLS), application/vnd.openxmlformats-officedocument.spreadsheetml.sheet (XLSX)
+#Microsoft PowerPoint: application/vnd.ms-powerpoint (PPT), application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)
+#Rich Text Format: application/rtf
+#Images
+#JPEG: image/jpeg
+#PNG: image/png
+#GIF: image/gif
+#BMP: image/bmp
+#TIFF: image/tiff
+#Web Content
+#Web Pages: text/html, which includes embedded images, scripts, etc.
+#CSS: text/css
+#JavaScript: application/javascript
+#Archives and Compressed Files
+#ZIP: application/zip
+#GZIP: application/gzip
+#Multimedia
+#MP3: audio/mpeg
+#MP4: video/mp4
+#WAV: audio/wav
+#OGG: audio/ogg
+#WebM: video/webm
+#Others
+#JSON: application/json
+#YAML: application/x-yaml
+#CSV: text/csv
+#Specialized Formats
+#EPUB: application/epub+zip
+#MOBI: application/x-mobipocket-ebook
\ No newline at end of file
diff --git a/multisearch.py b/multisearch.py
new file mode 100644
index 00000000..e9221d0b
--- /dev/null
+++ b/multisearch.py
@@ -0,0 +1,141 @@
+import os
+import json
+import cv2
+import requests
+import torch
+import networkx as nx
+import matplotlib.pyplot as plt
+from neo4j import GraphDatabase
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from fastapi import FastAPI, UploadFile, File
+from langchain.llms import OpenAI
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Neo4j Configuration
+NEO4J_URI = "bolt://localhost:7687"
+NEO4J_USER = "neo4j"
+NEO4J_PASSWORD = "password"
+STACKAI_LLM_API = "http://localhost:8000/v1/completions"
+
+# Initialize FastAPI
+app = FastAPI()
+
+# Load OCR Model (TrOCR)
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+
+# Connect to Neo4j
+driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
+
+def preprocess_image(image_path):
+    """Preprocess image for OCR."""
+    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+    image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    return image
+
+def extract_text(image_path):
+    """Extract text from handwritten notes."""
+    image = preprocess_image(image_path)
+    image = cv2.resize(image, (1024, 1024))
+
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return extracted_text
+
+def extract_math(image_path):
+    """Extract mathematical formulas."""
+    response = requests.post("https://huggingface.co/breezedeus/pix2text-mfr", 
+                             files={"file": open(image_path, "rb")})
+    return response.json().get("text", "")
+
+def extract_chemistry(image_path):
+    """Extract chemical formulas."""
+    response = requests.post("https://chemocr.ai/api/v1/predict", 
+                             files={"file": open(image_path, "rb")})
+    return response.json().get("chemical_formula", "")
+
+def summarize_content(text):
+    """Summarize scientific notes using LLM."""
+    prompt = PromptTemplate(
+        input_variables=["text"],
+        template="Summarize this scientific note and extract key concepts: {text}"
+    )
+    chain = LLMChain(llm=OpenAI(base_url=STACKAI_LLM_API, api_key="your-api-key"), prompt=prompt)
+    return chain.run(text)
+
+def store_in_neo4j(title, text, math, chem, summary):
+    """Store extracted information in Neo4j as a knowledge graph."""
+    with driver.session() as session:
+        session.run("""
+            CREATE (n:Note {title: $title, text: $text, summary: $summary})
+        """, title=title, text=text, summary=summary)
+
+        for formula in math.split("\n"):
+            if formula:
+                session.run("""
+                    MATCH (n:Note {title: $title})
+                    CREATE (m:MathFormula {formula: $formula})-[:APPEARS_IN]->(n)
+                """, title=title, formula=formula)
+
+        for compound in chem.split("\n"):
+            if compound:
+                session.run("""
+                    MATCH (n:Note {title: $title})
+                    CREATE (c:Chemical {compound: $compound})-[:APPEARS_IN]->(n)
+                """, title=title, compound=compound)
+
+@app.post("/process/")
+async def process_image(file: UploadFile = File(...)):
+    """Process uploaded image and store in Neo4j."""
+    file_path = f"./temp/{file.filename}"
+    with open(file_path, "wb") as f:
+        f.write(await file.read())
+
+    extracted_text = extract_text(file_path)
+    extracted_math = extract_math(file_path)
+    extracted_chem = extract_chemistry(file_path)
+
+    full_content = f"{extracted_text}\nMathematical Formulas: {extracted_math}\nChemical Formulas: {extracted_chem}"
+    summary = summarize_content(full_content)
+
+    store_in_neo4j(file.filename, extracted_text, extracted_math, extracted_chem, summary)
+
+    return {"summary": summary, "math": extracted_math, "chem": extracted_chem}
+
+@app.get("/knowledge-graph/")
+def get_knowledge_graph():
+    """Retrieve knowledge graph data from Neo4j and visualize it."""
+    query = """
+    MATCH (n)-[r]->(m) RETURN n, r, m
+    """
+    nodes = []
+    relationships = []
+
+    with driver.session() as session:
+        results = session.run(query)
+        for record in results:
+            n, r, m = record["n"], record["r"], record["m"]
+            nodes.append(n["title"] if "title" in n else n["formula"] if "formula" in n else n["compound"])
+            nodes.append(m["title"] if "title" in m else m["formula"] if "formula" in m else m["compound"])
+            relationships.append((n["title"] if "title" in n else n["formula"] if "formula" in n else n["compound"], 
+                                  m["title"] if "title" in m else m["formula"] if "formula" in m else m["compound"]))
+
+    G = nx.Graph()
+    G.add_edges_from(relationships)
+
+    plt.figure(figsize=(10, 6))
+    nx.draw(G, with_labels=True, node_color="lightblue", edge_color="gray", node_size=2000, font_size=10)
+    plt.title("Knowledge Graph")
+    plt.savefig("./temp/knowledge_graph.png")
+    return {"message": "Knowledge graph updated. View the generated graph at /temp/knowledge_graph.png"}
+
+if __name__ == "__main__":
+    import uvicorn
+    os.makedirs("./temp", exist_ok=True)
+    uvicorn.run(app, host="0.0.0.0", port=8080)
\ No newline at end of file
diff --git a/process_documents.py b/process_documents.py
new file mode 100644
index 00000000..f74948f7
--- /dev/null
+++ b/process_documents.py
@@ -0,0 +1,156 @@
+import os
+import cv2
+import json
+import requests
+import torch
+import networkx as nx
+import matplotlib.pyplot as plt
+from pdfminer.high_level import extract_text as extract_pdf_text
+from docx import Document
+from neo4j import GraphDatabase
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from langchain.llms import OpenAI
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Neo4j Configuration
+NEO4J_URI = "bolt://localhost:7687"
+NEO4J_USER = "neo4j"
+NEO4J_PASSWORD = "password"
+STACKAI_LLM_API = "http://localhost:8000/v1/completions"
+
+# Initialize Neo4j connection
+driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
+
+# Load TrOCR for OCR
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+
+def preprocess_image(image_path):
+    """Preprocess image for OCR."""
+    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+    image = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    return image
+
+def extract_text_from_image(image_path):
+    """Extract handwritten text from image using TrOCR."""
+    image = preprocess_image(image_path)
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+def extract_text_from_pdf(pdf_path):
+    """Extract text from PDF using pdfminer."""
+    return extract_pdf_text(pdf_path)
+
+def extract_text_from_docx(docx_path):
+    """Extract text from DOCX using python-docx."""
+    doc = Document(docx_path)
+    return "\n".join([para.text for para in doc.paragraphs])
+
+def extract_text_from_txt(txt_path):
+    """Extract text from TXT file."""
+    with open(txt_path, "r", encoding="utf-8") as file:
+        return file.read()
+
+def extract_math(image_path):
+    """Extract mathematical formulas."""
+    response = requests.post("https://huggingface.co/breezedeus/pix2text-mfr", 
+                             files={"file": open(image_path, "rb")})
+    return response.json().get("text", "")
+
+def extract_chemistry(image_path):
+    """Extract chemical formulas."""
+    response = requests.post("https://chemocr.ai/api/v1/predict", 
+                             files={"file": open(image_path, "rb")})
+    return response.json().get("chemical_formula", "")
+
+def summarize_content(text):
+    """Summarize extracted content using LLM."""
+    prompt = PromptTemplate(
+        input_variables=["text"],
+        template="Summarize this scientific note and extract key concepts: {text}"
+    )
+    chain = LLMChain(llm=OpenAI(base_url=STACKAI_LLM_API, api_key="your-api-key"), prompt=prompt)
+    return chain.run(text)
+
+def store_in_neo4j(filename, text, math, chem, summary):
+    """Store extracted information in Neo4j as a knowledge graph."""
+    with driver.session() as session:
+        session.run("""
+            CREATE (n:Document {filename: $filename, text: $text, summary: $summary})
+        """, filename=filename, text=text, summary=summary)
+
+        for formula in math.split("\n"):
+            if formula:
+                session.run("""
+                    MATCH (n:Document {filename: $filename})
+                    CREATE (m:MathFormula {formula: $formula})-[:APPEARS_IN]->(n)
+                """, filename=filename, formula=formula)
+
+        for compound in chem.split("\n"):
+            if compound:
+                session.run("""
+                    MATCH (n:Document {filename: $filename})
+                    CREATE (c:Chemical {compound: $compound})-[:APPEARS_IN]->(n)
+                """, filename=filename, compound=compound)
+
+def process_files(directory):
+    """Recursively process files in a directory and store them in Neo4j."""
+    for root, _, files in os.walk(directory):
+        for file in files:
+            file_path = os.path.join(root, file)
+            print(f"Processing: {file_path}")
+
+            extracted_text = ""
+            extracted_math = ""
+            extracted_chem = ""
+
+            if file.lower().endswith((".jpg", ".jpeg", ".png")):
+                extracted_text = extract_text_from_image(file_path)
+                extracted_math = extract_math(file_path)
+                extracted_chem = extract_chemistry(file_path)
+            elif file.lower().endswith(".pdf"):
+                extracted_text = extract_text_from_pdf(file_path)
+            elif file.lower().endswith(".docx"):
+                extracted_text = extract_text_from_docx(file_path)
+            elif file.lower().endswith(".txt"):
+                extracted_text = extract_text_from_txt(file_path)
+
+            full_content = f"{extracted_text}\nMath: {extracted_math}\nChem: {extracted_chem}"
+            summary = summarize_content(full_content)
+
+            store_in_neo4j(file, extracted_text, extracted_math, extracted_chem, summary)
+
+def generate_knowledge_graph():
+    """Generate a visualization of the knowledge graph."""
+    query = "MATCH (n)-[r]->(m) RETURN n, r, m"
+    nodes = []
+    relationships = []
+
+    with driver.session() as session:
+        results = session.run(query)
+        for record in results:
+            n, r, m = record["n"], record["r"], record["m"]
+            nodes.append(n["filename"] if "filename" in n else n["formula"] if "formula" in n else n["compound"])
+            nodes.append(m["filename"] if "filename" in m else m["formula"] if "formula" in m else m["compound"])
+            relationships.append((n["filename"] if "filename" in n else n["formula"] if "formula" in n else n["compound"], 
+                                  m["filename"] if "filename" in m else m["formula"] if "formula" in m else m["compound"]))
+
+    G = nx.Graph()
+    G.add_edges_from(relationships)
+
+    plt.figure(figsize=(10, 6))
+    nx.draw(G, with_labels=True, node_color="lightblue", edge_color="gray", node_size=2000, font_size=10)
+    plt.title("Knowledge Graph")
+    plt.savefig("knowledge_graph.png")
+    print("Knowledge graph saved as knowledge_graph.png")
+
+if __name__ == "__main__":
+    dir_path = input("Enter the directory path to process: ")
+    process_files(dir_path)
+    generate_knowledge_graph()
\ No newline at end of file
diff --git a/pull_model.Dockerfile b/pull_model.Dockerfile
index b06625f7..ca1fcbb0 100644
--- a/pull_model.Dockerfile
+++ b/pull_model.Dockerfile
@@ -15,31 +15,32 @@ COPY <<EOF pull_model.clj
   (let [llm (get (System/getenv) "LLM")
         url (get (System/getenv) "OLLAMA_BASE_URL")]
     (println (format "pulling ollama model %s using %s" llm url))
+
     (if (and llm 
-         url 
-         (not (#{"gpt-4" "gpt-3.5" "claudev2" "gpt-4o" "gpt-4-turbo"} llm))
-         (not (some #(.startsWith llm %) ["ai21.jamba-instruct-v1:0"
-                                          "amazon.titan"
-                                          "anthropic.claude"
-                                          "cohere.command"
-                                          "meta.llama"
-                                          "mistral.mi"])))
-
-      ;; ----------------------------------------------------------------------
-      ;; just call `ollama pull` here - create OLLAMA_HOST from OLLAMA_BASE_URL
-      ;; ----------------------------------------------------------------------
-      ;; TODO - this still doesn't show progress properly when run from docker compose
+             url 
+             (not (#{\"gpt-4\" \"gpt-3.5\" \"claudev2\" \"gpt-4o\" \"gpt-4-turbo\"} llm))
+             (not (some #(.startsWith llm %) 
+                         [\"ai21.jamba-instruct-v1:0\"
+                          \"amazon.titan\"
+                          \"anthropic.claude\"
+                          \"cohere.command\"
+                          \"meta.llama\"
+                          \"mistral.mi\"])))
 
       (let [done (async/chan)]
         (async/go-loop [n 0]
           (let [[v _] (async/alts! [done (async/timeout 5000)])]
-            (if (= :stop v) :stopped (do (println (format "... pulling model (%ss) - will take several minutes" (* n 10))) (recur (inc n))))))
-        (process/shell {:env {"OLLAMA_HOST" url "HOME" (System/getProperty "user.home")} :out :inherit :err :inherit} (format "bash -c './bin/ollama show %s --modelfile > /dev/null || ./bin/ollama pull %s'" llm llm))
+            (if (= :stop v) :stopped 
+                (do (println (format "... pulling model (%ss) - will take several minutes" (* n 10))) 
+                    (recur (inc n))))))
+
+        (process/shell {:env {"OLLAMA_HOST" url 
+                              "HOME" (System/getProperty "user.home")} 
+                        :out :inherit :err :inherit} 
+                       (format "bash -c './bin/ollama show %s --modelfile > /dev/null || ./bin/ollama pull %s'" llm llm))
+
         (async/>!! done :stop))
 
       (println "OLLAMA model only pulled if both LLM and OLLAMA_BASE_URL are set and the LLM model is not gpt")))
   (catch Throwable _ (System/exit 1)))
 EOF
-
-ENTRYPOINT ["bb", "-f", "pull_model.clj"]
-
diff --git a/pull_model.clj b/pull_model.clj
new file mode 100644
index 00000000..5251db18
--- /dev/null
+++ b/pull_model.clj
@@ -0,0 +1,35 @@
+(ns pull-model
+  (:require [babashka.process :as process]
+            [clojure.core.async :as async]))
+
+(try
+  (let [llm (get (System/getenv) "LLM")
+        url (get (System/getenv) "OLLAMA_BASE_URL")]
+    (println (format "pulling ollama model %s using %s" llm url))
+    (if (and llm 
+             url 
+             (not (#{"gpt-4" "gpt-3.5" "claudev2" "gpt-4o" "gpt-4-turbo"} llm))
+             (not (some #(.startsWith llm %) ["ai21.jamba-instruct-v1:0"
+                                              "amazon.titan"
+                                              "anthropic.claude"
+                                              "cohere.command"
+                                              "meta.llama"
+                                              "mistral.mi"])))
+
+      ;; ----------------------------------------------------------------------
+      ;; just call `ollama pull` here - create OLLAMA_HOST from OLLAMA_BASE_URL
+      ;; ----------------------------------------------------------------------
+      ;; TODO - this still doesn't show progress properly when run from docker compose
+
+      (let [done (async/chan)]
+        (async/go-loop [n 0]
+          (let [[v _] (async/alts! [done (async/timeout 5000)])]
+            (if (= :stop v) :stopped (do (println (format "... pulling model (%ss) - will take several minutes" (* n 10))) (recur (inc n))))))
+
+        (process/shell {:env {"OLLAMA_HOST" url "HOME" (System/getProperty "user.home")} :out :inherit :err :inherit}
+                       (format "bash -c './bin/ollama show %s --modelfile > /dev/null || ./bin/ollama pull %s'" llm llm))
+        (async/>!! done :stop))
+
+      (println "OLLAMA model only pulled if both LLM and OLLAMA_BASE_URL are set and the LLM model is not gpt")))
+
+  (catch Throwable _ (System/exit 1)))