main.py

# -*- coding: utf-8 -*-
"""pipeline.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1or18eWRoJialGcp64RLnJCS3GftlzBo8
"""

import torchvision
print(torchvision.__version__)

import subprocess
import sys
import asyncio
# import boto3
import clip
import cv2
# import dlib
import ffmpeg
import getpass
import glob
import io
import json
import logging
import matplotlib.pyplot as plt
import nest_asyncio
import numpy as np
import openai
import os
import pandas as pd
import random
import re
import requests
import shutil
import speech_recognition as sr
import tensorflow as tf
import tensorflow_hub as hub
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
import yt_dlp
from diffusers import (StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline,
                       StableDiffusionInstructPix2PixPipeline, StableDiffusionPipeline,
                       EulerAncestralDiscreteScheduler, LMSDiscreteScheduler)
# from google.colab import output
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseUpload
from IPython.display import Markdown, display
from io import BytesIO
# from langchain import OpenAI, LLMChain, PromptTemplate
from moviepy.editor import VideoFileClip
from pathlib import Path
from PIL import Image
# from pyannote.audio import Pipeline
from pydub import AudioSegment
from pytube import YouTube
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from telegram import Update
from telegram.ext import Application, CommandHandler, MessageHandler, CallbackContext, filters
from transformers import (AutoModelForCausalLM, AutoProcessor, AutoTokenizer, BlipForConditionalGeneration,
                          BlipProcessor, CLIPModel, CLIPProcessor, CLIPSegForImageSegmentation,
                          CLIPSegProcessor, CLIPTokenizer, VisionEncoderDecoderModel,
                          ViTFeatureExtractor, pipeline, ViTImageProcessor, T5ForConditionalGeneration, T5Tokenizer)
from ultralytics import YOLOWorld

os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

"""# Load API keys"""

def load_api_keys(api_key_file_path):
    """Load API keys from a given file path."""
    keys = {}
    with open(api_key_file_path, 'r') as file:
        for line in file:
            if '=' in line:
                key, value = line.strip().split('=', 1)
                keys[key.strip()] = value.strip()
    return keys

api_key_file_path = './api_keys.txt'
api_keys = load_api_keys(api_key_file_path)

def build_youtube_client(api_key):
    """Build and return the YouTube client."""
    return build('youtube', 'v3', developerKey=api_key)

def initialize_openai(api_key):
    """Initialize the OpenAI client."""
    openai.api_key = api_key

def initialize_stability_api(api_key):
    """Initialize the Stability API with the provided API key."""
    return api_key

excel_file = r'G:\My Drive\Capstone\DataFrame\videos_df_final v20.xlsx'
videos_df = pd.read_excel(excel_file, index_col=0)
print(videos_df.head())

def generate_query_embeddings(image, query_text):
    # Load CLIP model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load("ViT-B/32", device=device)
    #preprocess = BlipProcessor.from_pretrained('Salesforce/blip-image-captioning-base')
    #model = BlipForConditionalGeneration.from_pretrained('Salesforce/blip-image-captioning-base').to(device)

    # Generate embedding for the query image
    query_image = preprocess(image).unsqueeze(0).to(device)
    with torch.no_grad():
        query_image_features = model.encode_image(query_image)
        query_image_embedding = query_image_features.cpu().numpy()

    max_length = 100  # Adjust the maximum length as needed
    if len(query_text) > max_length:
        query_text = query_text[:max_length]

    # Generate embedding for the query text
    query_text_token = clip.tokenize([query_text]).to(device)
    with torch.no_grad():
        query_text_features = model.encode_text(query_text_token)
        query_text_embedding = query_text_features.cpu().numpy()

    return query_text_embedding, query_image_embedding

def search_videos(query_embedding, videos_df, category, top_k=1):
    # Filter videos based on category
    category_videos_df = videos_df[videos_df['category'] == category]

    # Filter videos based on duration (< 60 seconds)
    filtered_videos_df = category_videos_df[category_videos_df['duration'] < 60]

    # Debug: Print the number of videos in the category_videos_df DataFrame
    print(f"Number of videos in the '{category}' category: {len(category_videos_df)}")

    # Debug: Print the number of videos in the filtered_videos_df DataFrame
    print(f"Number of videos in the '{category}' category with duration < 60 seconds: {len(filtered_videos_df)}")

    # Debug: Print the video IDs and titles of the videos in the category_videos_df DataFrame
    print(f"Video IDs and titles in the '{category}' category:")
    for index, row in category_videos_df.iterrows():
        print(f"Video ID: {row['video_id']}, Title: {row['title']}")

    # Debug: Print the video IDs and titles of the videos in the filtered_videos_df DataFrame
    print(f"Video IDs and titles in the '{category}' category with duration < 60 seconds:")
    for index, row in filtered_videos_df.iterrows():
        print(f"Video ID: {row['video_id']}, Title: {row['title']}")

    # Get the combined embeddings from the filtered DataFrame
    combined_embeddings = filtered_videos_df['combined_embedding'].tolist()
    for embedding in filtered_videos_df['combined_embedding']:
        if isinstance(embedding, np.ndarray) and not np.isnan(embedding).any():
            combined_embeddings.append(embedding)
        else:
            combined_embeddings.append(None)  # Handle cases where embedding is not an ndarray or contains NaN

    # Calculate the cosine similarity between the query embedding and combined embeddings
    similarity_scores = []
    for index, row in filtered_videos_df.iterrows():
        embedding = row['combined_embedding']
        if isinstance(embedding, list) or isinstance(embedding, np.ndarray):
            embedding = np.array(embedding)
            if not np.isnan(embedding).any():
                similarity = cosine_similarity([query_embedding], [embedding])[0][0]
                similarity_scores.append(similarity)
            else:
                similarity_scores.append(0)  # Handle NaN values gracefully
        else:
            similarity_scores.append(0)  #

    # Get the index of the video with the highest similarity score
    top_index = np.argmax(similarity_scores)

    # Return the top video from the filtered DataFrame
    return filtered_videos_df.iloc[top_index]

async def get_video_file(video_data):
    video_directory = "G:\\My Drive\\Capstone\\Shorts\\"
    category_folder = video_data['category']
    video_id = video_data['video_id']

    print(f"Video Directory: {video_directory}")
    print(f"Category Folder: {category_folder}")
    print(f"Video ID: {video_id}")

    # Construct the path to the category directory
    category_directory = os.path.join(video_directory, category_folder)

    print(f"Category Directory: {category_directory}")

    if os.path.exists(category_directory):
        for filename in os.listdir(category_directory):
            # video_id = 'Tvt14SaKduk'
            if filename.startswith(video_id):
                video_path = os.path.join(category_directory, filename)
                # video_path = 'G:\\My Drive\\Capstone\\Shorts\\Fashion And Apparel\\Tvt14SaKduk-How To Layer Essentials | Styling Advice | Fashion Tips | Over Fifty Fashion | Carla Rockmore.mp4'
                print(f"Found video file: {video_path}")  # Debugging print
                return video_path
    else:
        print(f"Category directory does not exist: {category_directory}")  # Debugging print

    print("Video file not found for video_id:", video_id)  # Debugging print
    return None

async def generate_recommendation_reason(video_data, customer_input):
    system_prompt = "You are an AI assistant that provides a reason for recommending a video based on its textual data and the customer's input. Customer is an e-commerce seller on TikTok Shop or Shopee in Indonesia"
    prompt = f"Based on the following video data and customer input, provide a reason why this video is a good recommendation:\n\nVideo Data:\nVideo ID: {video_data['video_id']}\nTitle: {video_data['title']}\nCaption: {video_data['captions']}\nDescription: {video_data['description']}\nPublish Time: {video_data['publish_time']}\nViews: {video_data['views']}\nLikes: {video_data['likes']}\nDislikes: {video_data['dislikes']}\nComment Count: {video_data['comment_counts']}\nDuration: {video_data['duration']}\nComments: {video_data['comments']}\nCategory: {video_data['category']}\nCOT: {video_data['COT']}\nTranscript: {video_data['transcription']}\nAnalysis: {video_data['analysis']}\n\nCustomer Input:\n{customer_input}"

    response = openai.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": f"{system_prompt}"},
            {"role": "user", "content": f'{prompt}'}
        ],
        max_tokens=800,
        n=1,
        temperature=0.6,
    )

    return response.choices[0].message.content.strip()

def extract_product_from_image(image):
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

    # Convert the image to the format expected by the model
    inputs = processor(image, return_tensors="pt")

    # Generate the image caption
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)

    return caption

def find_closest_category(product_description, videos_df):
    categories = videos_df['category'].unique()

    # Prepare the prompt for OpenAI
    system_prompt = "You are an AI assistant that identifies the closest category for a given product description."
    prompt = f"Given the following product description, identify the closest category from the list of categories:\n\nProduct Description: {product_description}\nCategories: {', '.join(categories)}, answer succinctly"

    response = openai.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": f"{system_prompt}"},
            {"role": "user", "content": f'{prompt}'}
        ],
        max_tokens=50,
        n=1,
        temperature=0.6,
    )

    closest_category = response.choices[0].message.content.strip()
    return closest_category

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

print("Loading pre-trained models...")

# Load the pre-trained Stable Diffusion Instruct Pix2Pix model
#pix2pix_model_id = "timbrooks/instruct-pix2pix"
#pix2pix_pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(pix2pix_model_id, torch_dtype=torch.float16)
#pix2pix_pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pix2pix_pipeline.scheduler.config)
#pix2pix_pipeline.to(device)

# Load the pre-trained Stable Diffusion model
#stable_diffusion_model_id = "CompVis/stable-diffusion-v1-4"
#stable_diffusion_pipeline = StableDiffusionPipeline.from_pretrained(stable_diffusion_model_id, torch_dtype=torch.float16)
#stable_diffusion_pipeline.to(device)

# Load the YOLO v5 object detection model
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Load the image-to-text model
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

# Load the text similarity model
text_similarity_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")

# Load the image embedding model
# image_embedding = pipeline("image-feature-extraction", model="openai/clip-vit-base-patch32")

print("Models loaded successfully!")

few_shot_learnings = '''
    Generate a detailed prompt for an image based on the given object description and the memory dataframe. The prompt should follow these guidelines:

    * Be specific: Describe the image's subject and context, and add context that relates to the page's topic
    * Keep it short: Keep the prompt under 125 characters
    * Avoid starting with "picture of..." or "Image of...": Jump right into the image's description
    * Use keywords sparingly: Don't cram keywords into every image's prompt
    * Review for spelling errors: Make sure your prompt is error free
    * Don't mention any humans or people in the prompt, focus only on the objects and products
    * Mention the color of the objects and products in the prompt
    * Consider the previous frames and object descriptions from the memory dataframe to maintain consistency
    * Describe the object as detailed as possible
    '''

def send_generation_request(host, params):
    headers = {
        "Accept": "image/*",
        "Authorization": f"Bearer {STABILITY_API_KEY}"
    }

    # Encode parameters
    files = {}
    image = params.pop("image", None)
    mask = params.pop("mask", None)
    if image is not None:
        if isinstance(image, str) and image != '':
            files["image"] = open(image, 'rb')
        elif isinstance(image, Image.Image):
            image_bytes = BytesIO()
            image.save(image_bytes, format='PNG')
            image_bytes.seek(0)
            files["image"] = image_bytes
    if mask is not None and mask != '':
        files["mask"] = open(mask, 'rb')
    if len(files) == 0:
        files["none"] = ''

    # Send request
    print(f"Sending REST request to {host}...")
    response = requests.post(
        host,
        headers=headers,
        files=files,
        data=params
    )
    if not response.ok:
        raise Exception(f"HTTP {response.status_code}: {response.text}")
    return response

def detect_objects(image):
    # Perform object detection using YOLO v5
    results = yolo_model(image)

    # Process the detection results and extract bounding boxes
    bounding_boxes = []
    for detection in results.xyxy[0]:
        if detection[-1] == 0:  # Class 0 represents person
            x1, y1, x2, y2, _, _ = detection
            bounding_boxes.append([int(x1), int(y1), int(x2-x1), int(y2-y1)])

    return bounding_boxes

def generate_prompt(memory_df, object_description):
    memory_info = memory_df.to_string(index=False)

    response = openai.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": few_shot_learnings},
            {"role": "user", "content": f"Memory:\n{memory_info}\n\nObject Description: {object_description}"}
        ],
        max_tokens=150,
        n=1,
        temperature=0.7,
    )

    return response.choices[0].message.content.strip()

def evaluate_similarity(memory_df, bounding_boxes):
    # Calculate the average similarity score across all objects in the current frame
    avg_similarity = memory_df.iloc[-len(bounding_boxes):]["Similarity"].mean()
    return avg_similarity

def visualize_similarity(similarity_scores):
    # Plot similarity scores
    plt.figure(figsize=(8, 6))
    plt.plot(range(len(similarity_scores)), similarity_scores)
    plt.xlabel("Frame")
    plt.ylabel("Similarity Score")
    plt.title("Similarity Score over Frames")
    plt.tight_layout()

    # Save the visualization
    similarity_plot_path = "G:\\My Drive\\Capstone\\Agents\\similarity_scores.png"
    plt.savefig(similarity_plot_path)
    plt.close()
    print(f"Similarity score plot saved: {similarity_plot_path}")

def initialize_image_embedding_model():
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    return model, processor

def first_image_embedding(image, model, processor):
    inputs = processor(images=image, return_tensors="pt")
    image_features = model.get_image_features(**inputs)
    image_embedding = image_features.detach().numpy().flatten()
    return image_embedding

def print_video_details(video_path, description):
    """Print details of a video file to help with debugging."""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"{description} - Length (frames): {length}, FPS: {fps}, Width: {width}, Height: {height}")
    cap.release()

def adjust_video_properties(source_path, target_path, target_fps, target_resolution):
    """Adjusts the frame rate and resolution of a video."""
    cap = cv2.VideoCapture(source_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(target_path, fourcc, target_fps, target_resolution)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, target_resolution)
        out.write(frame)

    cap.release()
    out.release()

def clear_folder(folder_path):
    # This function will delete all files in the specified folder
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # Remove each file
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # Remove entire subdirectories
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Initialize the image embedding model
image_embedding_model, image_embedding_processor = initialize_image_embedding_model()

def generate_video_frames(image_path, num_seconds, image_embedding_model, image_embedding_processor, text_similarity_model):
    print(f"\nGenerating video frames for {num_seconds} seconds...")

    # Load the input image using PIL
    input_image = Image.open(image_path)
    print("Input image loaded.")

    # Generate a caption for the input image
    input_image_caption = image_to_text(input_image)[0]["generated_text"]
    print(f"Input Image Caption: {input_image_caption}")

    # Generate the initial prompt
    initial_prompt = generate_prompt(pd.DataFrame(), input_image_caption)
    print(f"Initial Prompt: {initial_prompt}")

    # Generate the first frame using Stability AI API based on the initial prompt
    print("Generating the first frame...")
    host = f"https://api.stability.ai/v2beta/stable-image/generate/sd3"
    params = {
        "image": input_image,
        "prompt": initial_prompt,
        "strength": 1,
        "seed": 10,
        "output_format": "jpeg",
        "mode": "image-to-image",
        "model": "sd3-turbo"
    }
    response = send_generation_request(host, params)
    output_image = response.content

    # Create a BytesIO stream from the response content
    # image_stream = BytesIO(output_image)

    first_frame = Image.open(BytesIO(output_image))
    print("First frame generated.")
    display(first_frame)

    # Convert the first frame to OpenCV format
    first_frame_cv = cv2.cvtColor(np.array(first_frame), cv2.COLOR_RGB2BGR)

    print("First frame processed and displayed.")

    # Perform object detection on the first frame
    print("Performing object detection on the first frame...")
    bounding_boxes = detect_objects(first_frame_cv)
    print(f"Detected {len(bounding_boxes)} objects in the first frame.")

    input_image_embedding = first_image_embedding(input_image, image_embedding_model, image_embedding_processor)

    # Initialize the memory dataframe with the input image and its caption
    memory_df = pd.DataFrame({
        "Frame": [0],
        "Object": [input_image],
        "Caption": [input_image_caption],
        "Caption Embedding": [text_similarity_model(input_image_caption)[0][0]],  # Add this line
        "Image Embedding": [input_image_embedding],
        "Similarity": [1.0],
        "Prompt": [initial_prompt]
    })

    # Initialize the similarity scores list
    similarity_scores = [1.0]  # First frame has a similarity score of 1.0 with the input image

    frames = [first_frame_cv]
    # for second in range(1, num_seconds + 1):  # Inclusive range for the number of seconds
    for second in range(1, num_seconds + 1):  # Inclusive range for the number of seconds
        # print(f"\nGenerating frame {second}...")
        print(f"\nGenerating frame {second}...")
        frame = first_frame_cv.copy()

        for bbox in bounding_boxes:
            x, y, width, height = bbox

            # Crop the object from the frame
            cropped_object = frame[y:y+height, x:x+width]

            # Convert the cropped object to PIL image
            cropped_object_pil = Image.fromarray(cv2.cvtColor(cropped_object, cv2.COLOR_BGR2RGB))

            # Generate image-to-text description of the cropped object
            object_description = image_to_text(cropped_object_pil)[0]["generated_text"]
            print(f"Object Description: {object_description}")

            # Generate image embedding for the cropped object
            cropped_object_embedding = first_image_embedding(cropped_object_pil, image_embedding_model, image_embedding_processor)

            # Check similarity with previous object descriptions and embeddings in memory
            similarities = []
            for _, row in memory_df.iterrows():
                if 'Caption Embedding' not in memory_df.columns:
                    memory_df['Caption Embedding'] = memory_df['Caption'].apply(lambda x: text_similarity_model(x)[0][0])
                caption_embedding = row["Caption Embedding"]

                # Check if 'Image Embedding' column exists in memory_df
                if 'Image Embedding' not in memory_df.columns:
                    memory_df['Image Embedding'] = memory_df['Object'].apply(lambda x: first_image_embedding(x, image_embedding_model, image_embedding_processor))
                image_embedding = row["Image Embedding"]

                # Calculate cosine similarity between text embeddings
                text_similarity = cosine_similarity([text_similarity_model(object_description)[0][0]], [caption_embedding])[0][0]

                # Calculate cosine similarity between image embeddings
                image_similarity = cosine_similarity([cropped_object_embedding], [image_embedding])[0][0]

                # Combine text and image similarity scores
                combined_similarity = (text_similarity + image_similarity) / 2
                similarities.append(combined_similarity)

            max_similarity = max(similarities)
            most_similar_index = similarities.index(max_similarity)

            if 0 <= most_similar_index < len(memory_df):
                most_similar_object = memory_df.iloc[most_similar_index]["Object"]
                most_similar_caption = memory_df.iloc[most_similar_index]["Caption"]
            else:
                most_similar_object = None
                most_similar_caption = ""

            print(f"Max Similarity Score: {max_similarity}")

            # Update memory dataframe
            new_row = pd.DataFrame({
                "Frame": [second],
                "Object": [cropped_object_pil],
                "Caption": [object_description],
                "Caption Embedding": [text_similarity_model(object_description)[0][0]],
                "Image Embedding": [cropped_object_embedding],
                "Similarity": [max_similarity],
                "Prompt": ""
            })
            memory_df = pd.concat([memory_df, new_row], ignore_index=True)

            # Check if the similarity is above the threshold
            similarity_threshold = 0.7
            generated_image = cropped_object_pil
            if max_similarity < similarity_threshold:
                print(f"Similarity below threshold. Generating new prompt.")
                print("Memory:")
                print(memory_df)
                prompt = generate_prompt(memory_df, object_description)

                # Update the prompt in the memory dataframe
                memory_df.at[len(memory_df) - 1, "Prompt"] = prompt

                print(f"Stability AI Prompt:\n{prompt}")

                # Generate an image with the updated prompt using Stability AI API
                print("Generating image using Stability AI API...")
                host = f"https://api.stability.ai/v2beta/stable-image/generate/sd3"
                params = {
                    "prompt": prompt,
                    "strength": 1,
                    "seed": 10,
                    "output_format": "jpeg",
                    "mode": "image-to-image" if second == 1 else "text-to-image",
                    "model": "sd3-turbo"
                }
                if second == 1:
                    params["image"] = cropped_object_pil
                response = send_generation_request(host, params)
                generated_image = Image.open(BytesIO(response.content))
                print("Image generated.")

                # Display the generated image
                display(generated_image)

                # Generate image-to-text description of the generated image
                generated_image_description = image_to_text(generated_image)[0]["generated_text"]

                # Generate image embedding for the generated image
                generated_image_embedding = first_image_embedding(generated_image, image_embedding_model, image_embedding_processor)

                # Check similarity between the generated image description and embedding with the object description and embedding
                text_similarity_score = cosine_similarity([text_similarity_model(object_description)[0][0]], [text_similarity_model(generated_image_description)[0][0]])[0][0]
                image_similarity_score = cosine_similarity([cropped_object_embedding], [generated_image_embedding])[0][0]
                max_similarity = (text_similarity_score + image_similarity_score) / 2

            # Paste the generated object back into the frame
            frame[y:y+height, x:x+width] = cv2.cvtColor(np.array(generated_image), cv2.COLOR_RGB2BGR)

        # Evaluate the similarity score of the generated frame
        similarity_score = evaluate_similarity(memory_df, bounding_boxes)
        similarity_scores.append(similarity_score)

        print(f"Similarity Score: {similarity_score}")

        frames.append(frame)

        # Save the generated frame
        frame_path = fr"G:\My Drive\Capstone\Agents\image\frame_{second}.jpg"
        cv2.imwrite(frame_path, frame)
        print(f"Generated frame saved: {frame_path}")

        # Visualize the frame
        plt.figure(figsize=(8, 6))
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.title(f"Generated Frame {second}")
        plt.axis("off")
        plt.tight_layout()
        # visualization_path = f"/content/drive/MyDrive/Capstone/Agents/frame_{second}_visualization.jpg"
        # plt.savefig(visualization_path)
        plt.close()
        # print(f"Frame visualization saved: {visualization_path}")
        print(f"Frame visualization saved: {frame_path}")

    visualize_similarity(similarity_scores)

    return frames, similarity_scores

import subprocess
ffmpeg_executable = r'D:\Capstone (Vidia.AI)\Capstone (Vidia.AI)\ffmpeg\ffmpeg.exe'  # Use a raw string

def create_video_from_image(image_path, temp_video_path, duration=2, frame_rate=25):
    command = [
        ffmpeg_executable,
        '-loop', '1',
        '-t', str(duration),
        '-i', image_path,  # Do not quote the path
        '-vf', 'scale=320x240,setsar=1',
        '-framerate', str(frame_rate),
        '-c:v', 'libx264',
        '-pix_fmt', 'yuv420p',
        '-y', temp_video_path  # Do not quote the path
    ]
    process = subprocess.run(command, capture_output=True, text=True)
    print(f"Creating video from {image_path}")
    print("Command:", ' '.join(command))
    print("stdout:", process.stdout)
    print("stderr:", process.stderr)

    if process.returncode != 0:
        print(f"Error creating video from {image_path}. Error: {process.stderr}")
        raise RuntimeError(f"ffmpeg failed with error: {process.stderr}")

def first_stitch_videos(video_paths, output_video_path, frame_rate=25):
    command = [ffmpeg_executable]
    for path in video_paths:
        command += ['-i', path]  # Add each video file input

    filter_complex = f'concat=n={len(video_paths)}:v=1:a=0[outv]'  # assuming there's no audio to concat
    command += [
        '-filter_complex', filter_complex,
        '-map', '[outv]',
        '-c:v', 'libx264',
        '-r', str(frame_rate),
        '-pix_fmt', 'yuv420p',
        '-y', output_video_path
    ]

    process = subprocess.run(command, capture_output=True, text=True)
    print(f"Stitching videos into {output_video_path}")
    print("Command:", ' '.join(command))
    print("stdout:", process.stdout)
    print("stderr:", process.stderr)

def adjust_video_properties(input_video_path, output_video_path, target_fps, target_resolution):
    command = [
        ffmpeg_executable,
        '-i', input_video_path,
        '-vf', f'scale={target_resolution[0]}:{target_resolution[1]}',
        '-r', str(target_fps),
        '-c:v', 'libx264',
        '-pix_fmt', 'yuv420p',
        '-y', output_video_path
    ]
    subprocess.run(command, capture_output=True, text=True)
    print(f"Adjusted video saved to {output_video_path}")

def second_stitch_videos(video_paths, output_video_path, frame_rate=24, resolution=(768, 768)):
    # Prepare input arguments for ffmpeg
    inputs = []
    for path in video_paths:
        inputs.extend(['-i', path])

    # Prepare filter arguments for scaling and setting frame rate
    filters = []
    for i in range(len(video_paths)):
        filters.append(f'[{i}:v]scale={resolution[0]}:{resolution[1]},setsar=1,fps=fps={frame_rate}[v{i}]')

    # Create the filter complex for concatenating videos
    filter_complex = ';'.join(filters)
    filter_complex += f";{''.join(f'[v{i}]' for i in range(len(video_paths)))}concat=n={len(video_paths)}:v=1:a=0[outv]"

    # Assemble the complete command
    command = [ffmpeg_executable, *inputs, '-filter_complex', filter_complex, '-map', '[outv]', '-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-y', output_video_path]

    # Run the command
    process = subprocess.run(command, capture_output=True, text=True)
    print("Command:", ' '.join(command))
    print("stdout:", process.stdout)
    print("stderr:", process.stderr)

#logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
#logger = logging.getLogger(__name__)

async def process_video_generation(image_input_path):
    print(f"Processing video for image: {image_input_path}")
    base_path = r"G:\My Drive\Capstone\Agents"
    image_folder = base_path + r"\image"
    video_folder = base_path + r"\video"
    final_folder = base_path + r"\final"
    adjusted_folder = base_path + r"\adjusted"

    # Ensure folders exist and clear them
    for folder in [image_folder, video_folder, final_folder, adjusted_folder]:
        os.makedirs(folder, exist_ok=True)
        clear_folder(folder)
        logger.debug(f"Cleared folder: {folder}")

    # Clear folders
    clear_folder(image_folder)
    clear_folder(video_folder)
    clear_folder(final_folder)
    clear_folder(adjusted_folder)
    print("Image and video folders have been cleared.")

    try:
        image = Image.open(image_input_path)
        print("Image opened successfully.")
    except Exception as e:
        logger.error(f"Failed to open image {image_input_path}: {e}")
        return

    if image.format != 'PNG':
        print("Converting image to PNG format.")
        image_input_path = os.path.splitext(image_input_path)[0] + '.png'
        image.save(image_input_path)

    # Open and potentially resize image if not already compatible

    required_sizes = [(1024, 576), (576, 1024), (768, 768)]
    if image.size not in required_sizes:
        # Using Image.Resampling.LANCZOS for high-quality downsampling
        print(f"Resizing image from {image.size} to 768x768")
        image = image.resize((768, 768), Image.Resampling.LANCZOS)
        image.save(image_input_path)  # Save the resized image back to disk

    print("Generating frames and similarity scores...")

    # Generate frames and similarity scores
    num_seconds = 3
    frames, similarity_scores = generate_video_frames(image_input_path, num_seconds, image_embedding_model, image_embedding_processor, text_similarity_model)

    # Get the list of image files in the image folder
    image_files = sorted([f for f in os.listdir(image_folder) if f.endswith(".jpg")])
    print(f"Found {len(image_files)} image files in {image_folder}")

    # Create temporary videos from each image
    temp_video_files = []
    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        temp_video_path = os.path.join(video_folder, f'temp_video_{image_file[:-4]}.mp4')

        # Log the paths being used for video creation
        logger.debug(f"Processing image file {image_path} for video creation.")
        print(f"Creating video from image {image_path} to {temp_video_path}")

        # Check if the source image file exists before attempting to create video
        if not os.path.exists(image_path):
            logger.error(f"Source image file not found: {image_path}")
            print(f"Error: Source image file not found: {image_path}")
            continue  # Skip to the next file

        try:
            # Assuming create_video_from_image is a function you've defined elsewhere
            create_video_from_image(image_path, temp_video_path, duration=2)
            temp_video_files.append(temp_video_path)
            logger.info(f"Video created and stored at {temp_video_path}")
            print(f"Video successfully created at {temp_video_path}")
        except Exception as e:
            #logger.error(f"Failed to create video from {image_path}: {e}")
            print(f"Failed to create video from {image_path}: {e}")

    final_folder = "G:\\My Drive\\Capstone\\Agents\\final\\"
    clear_folder(final_folder)

    # Stitch the temporary videos into a final video
    final_video_path = os.path.join(final_folder, "final_video_1.mp4")
    print(f"Stitching {len(temp_video_files)} temporary videos into final video at {final_video_path}")
    first_stitch_videos(temp_video_files, final_video_path)

    # Generating video diffusion using Stability AI API
    print("Generating video diffusion using Stability AI API...")
    response = requests.post(
        f"https://api.stability.ai/v2beta/image-to-video",
        headers={
            "authorization": f"sk-SYgwArfvXHKn9TxbsGBMtkRNpiEmjdBAT2fc7F9Ri1b07dTa"
        },
        files={
            "image": open(image_input_path, "rb")
        },
        data={
            "seed": 5,
            "cfg_scale": 1.8,
            "motion_bucket_id": 127
        },
    )

    if response.status_code != 200:
        raise Exception("Failed to initiate video generation, response: " + response.text)

    response_data = response.json()
    generation_id = response_data.get('id')
    print('this is generation_id', generation_id)

    if not generation_id:
        raise Exception("No generation ID received, full response: " + str(response_data))

    # Polling the generation process
    while True:
        response = requests.request(
            "GET",
            f"https://api.stability.ai/v2beta/image-to-video/result/{generation_id}",
            headers={'accept': "video/*", 'authorization': STABILITY_API_KEY}
        )

        if response.status_code == 202:
            print("Generation in-progress, trying again in 10 seconds...")
            time.sleep(10)
        elif response.status_code == 200:
            print("Generation complete!")
            with open(f"{final_folder}/final_video_2.mp4", 'wb') as file:
                file.write(response.content)
            break
        else:
            raise Exception(str(response.json()))

    # Define paths to original and adjusted video files
    original_video_files = [f"{final_folder}/final_video_1.mp4", f"{final_folder}/final_video_2.mp4"]
    adjusted_video_files = [f"{adjusted_folder}/adjusted_final_video_1.mp4", f"{adjusted_folder}/adjusted_final_video_2.mp4"]

    # Adjust properties and stitch videos
    for original, adjusted in zip(original_video_files, adjusted_video_files):
        adjust_video_properties(original, adjusted, target_fps=24, target_resolution=(768, 768))

    final_output_path = f"{final_folder}/final_video_3.mp4"
    second_stitch_videos(adjusted_video_files, final_output_path, frame_rate=24, resolution=(768, 768))

    print("Final output video saved:", final_output_path)
    return final_output_path

nest_asyncio.apply()

import logging
# Set up logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

async def send_video_to_user(update: Update, context: CallbackContext, video_path):
    logger.info(f"Checking if video exists at path: {video_path}")
    if not os.path.exists(video_path):
        logger.error(f"Video file does not exist: {video_path}")
        await update.message.reply_text("Failed to find the video file.")
        return

    chat_id = update.message.chat_id
    try:
        with open(video_path, 'rb') as video:
            message = await context.bot.send_video(chat_id, video)
            logger.info(f"Video sent with message_id: {message.message_id}")
    except Exception as e:
        logger.error(f"Failed to send video: {str(e)}")
        await update.message.reply_text("Failed to send video.")

async def start_command(update: Update, context: CallbackContext) -> None:
    await update.message.reply_text('Welcome to the video recommendation chatbot! Send an image and a textual requirement to get a video recommendation.')

async def handle_message(update: Update, context: CallbackContext) -> None:
    print(update.message.text)
    welcome_message = '''
    Welcome to the video recommendation chatbot! Please send your product image
    Also, please provide the following information:
    1. Target Audience: 
    2. Video Purpose: 
    3. Video Style: 
    4. Desired Tone: 
    5. Key Message: 
    6. Unique Selling Proposition: 
    7. Call-to-Action: 
    8. Additional Requirements: 
    '''

    if not context.user_data.get('welcome_message_sent'):
        await update.message.reply_text(welcome_message)
        context.user_data['welcome_message_sent'] = True
    else:
        if update.message.photo:
            # file_id = update.message.photo[-1].file_id
            # file = await context.bot.get_file(file_id)
            # file_bytes = await file.download_as_bytearray()
            # image = Image.open(BytesIO(file_bytes))
            # print(update.message.caption)

            file_id = update.message.photo[-1].file_id
            file = await context.bot.get_file(file_id)
            file_bytes = await file.download_as_bytearray()
            
            # Debug: Print the size of the downloaded bytes to verify data completeness
            print(f"Downloaded image size: {len(file_bytes)} bytes")
            
            try:
                # Attempt to open the image
                image = Image.open(BytesIO(file_bytes))
                # image.show()  # Optionally display the image if running in an environment that supports it

            except PIL.UnidentifiedImageError:
                    print("Failed to identify image file. It may be corrupted or in an unsupported format.")
            except Exception as e:
                    print(f"An unexpected error occurred: {e}")
            else:
                # If there's a caption, print it
                if update.message.caption:
                    print("Caption:", update.message.caption)

            # Display the received image
            print("Received image:")
            display(image)
            image.save("received_image.jpg")

            query_text = update.message.caption or ""

            # Print the received text input
            print("Received text input:")
            print(query_text)

            # Send "Analyzing..." message
            await update.message.reply_text("Analyzing...")

            # Extract product description from the image
            product_description = extract_product_from_image(image)
            print("Extracted product description:")
            print(product_description)

            # Find the closest category based on the product description
            closest_category = find_closest_category(product_description, videos_df)
            print("Closest category:", closest_category)

            if closest_category is None:
                await update.message.reply_text("Unable to determine the closest category for the given product description.")
                return

            # Extract the category name from the closest_category variable
            category_name = closest_category.strip("'")
            print("Extracted category name:", category_name)

            # Debug: Print the unique categories in the videos_df DataFrame
            print("Unique categories in videos_df:")
            print(videos_df['category'].unique())

            # Debug: Print the number of videos in the closest category
            category_videos_df = videos_df[videos_df['category'] == category_name]
            print("Number of videos in the closest category:", len(category_videos_df))

            # Retry finding the closest category if the number of videos is 0
            max_retries = 3
            retry_count = 0
            while len(category_videos_df) == 0 and retry_count < max_retries:
                print(f"No videos found in the closest category '{category_name}'. Retrying...")
                closest_category = find_closest_category(product_description, videos_df)
                category_name = closest_category.strip("'")
                category_videos_df = videos_df[videos_df['category'] == category_name]
                retry_count += 1

            if len(category_videos_df) == 0:
                await update.message.reply_text("Unable to find a suitable category for the given product description.")
                return

            # Debug: Print the number of videos in the closest category with duration less than 60 seconds
            filtered_videos_df = category_videos_df[category_videos_df['duration'] < 60]
            print("Number of videos in the closest category with duration less than 60 seconds:", len(filtered_videos_df))

            # Debug: Print the video IDs and titles of the filtered videos
            print("Filtered video IDs and titles:")
            for index, row in filtered_videos_df.iterrows():
                print(f"Video ID: {row['video_id']}, Title: {row['title']}")

            # Generate query embeddings
            query_text_embedding, query_image_embedding = generate_query_embeddings(image, query_text)

            # Combine the query text and image embeddings
            query_embedding = np.concatenate((query_text_embedding.flatten(), query_image_embedding.flatten()))

            # Perform video search based on the closest category
            top_video = search_videos(query_embedding, videos_df, closest_category)

            if top_video is not None:
                # Get the video file path
                video_file = await get_video_file(top_video)

                # Send the video file
                with open(video_file, 'rb') as file:
                    await update.message.reply_video(video=file)

                if video_file:
                    # Generate recommendation reason using LLM
                    recommendation_reason = await generate_recommendation_reason(top_video, query_text)                    

                    # Print the columns of the recommended video
                    recommended_video_details = f"""
                    ✨ Recommended Video Details ✨
                    ===============================
                    🎥 Video ID: {top_video['video_id']}
                    📌 Title: {top_video['title']}
                    💬 Caption: {top_video['captions']}
                    📝 Description: {top_video['description']}
                    🗓️ Publish Time: {top_video['publish_time']}
                    👀 Views: {top_video['views']}
                    👍 Likes: {top_video['likes']}
                    👎 Dislikes: {top_video['dislikes']}
                    💬 Comment Count: {top_video['comment_counts']}
                    ⏳ Duration: {top_video['duration']}
                    💭 Comments: {top_video['comments']}
                    🔠 Category: {top_video['category']}
                    🌐 COT: {top_video['COT']}
                    📜 Transcript: {top_video['transcription']}
                    🔍 Analysis: {top_video['analysis']}
                    ===============================
                    """
                    print(recommended_video_details)
                    logging.info(recommended_video_details)

                    # Display the recommended video details in the Jupyter Notebook
                    display(Markdown(recommended_video_details))

                    # Send the recommendation reason in multiple messages if it's too long
                    max_message_length = 4096  # Telegram's maximum message length
                    if len(recommendation_reason) > max_message_length:
                        # Split the recommendation reason into multiple messages
                        recommendation_parts = [recommendation_reason[i:i+max_message_length] for i in range(0, len(recommendation_reason), max_message_length)]
                        for part in recommendation_parts:
                            await update.message.reply_text(part)
                    else:
                        await update.message.reply_text(recommendation_reason)
                        # Process the image to generate video

                    try:
                        video_path = 'G:\\My Drive\\Capstone\\Agents\\final\\final_video_3.mp4'
                        # await update.message.reply_text("Generating...")
                        check = await process_video_generation('received_image.jpg')  # Path to the generated video
                        await send_video_to_user(update, context, video_path)
                    except Exception as e:
                        logger.error(f"Error processing video: {str(e)}")
                        await update.message.reply_text('Error processing video.')
                else:
                    await update.message.reply_text("Video file not found.")
            else:
                await update.message.reply_text("No suitable video found for the given image and textual requirement.")
def main():
    application = Application.builder().token('6777992733:AAFJxUM2qMz_DjtPTTWcxFku6Mev-hOfFSI').read_timeout(60).write_timeout(60).build()

    application.add_handler(CommandHandler('start', start_command))
    application.add_handler(MessageHandler(filters.ALL & ~filters.COMMAND, handle_message))

    application.run_polling()

if __name__ == '__main__':
    main()