diff --git a/README.md b/README.md index a08c64acd..7f41cbb27 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Welcome, candidates! This project is a custom-built chatbot application that mim - Deletion of conversations. - Assistant message regeneration. - User message editing. -- Model selection: currently GPT-3.5 or GPT-4. +- Model selection: currently GPT-3.5 or GPT-4. or Gemini - Custom Django admin page for managing conversations, versions, and messages. ## Images diff --git a/backend/authentication/migrations/0002_role_customuser_role.py b/backend/authentication/migrations/0002_role_customuser_role.py new file mode 100644 index 000000000..89fdd99ba --- /dev/null +++ b/backend/authentication/migrations/0002_role_customuser_role.py @@ -0,0 +1,26 @@ +# Generated by Django 5.2.4 on 2025-08-01 07:06 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('authentication', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Role', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ], + ), + migrations.AddField( + model_name='customuser', + name='role', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='authentication.role'), + ), + ] diff --git a/backend/authentication/models.py b/backend/authentication/models.py index 4a565e6cd..f4564d484 100644 --- a/backend/authentication/models.py +++ b/backend/authentication/models.py @@ -1,6 +1,12 @@ from django.contrib.auth.models import AbstractBaseUser, BaseUserManager, PermissionsMixin from django.db import models +class Role(models.Model): + name = models.CharField(max_length=255) + + def __str__(self): + return self.name + class CustomUserManager(BaseUserManager): def create_user(self, email, password, **extra_fields): @@ -31,6 +37,8 @@ def create_superuser(self, email, password, **extra_fields): class CustomUser(AbstractBaseUser, PermissionsMixin): email = models.EmailField(unique=True) + role = models.ForeignKey(Role, null=True, blank=True, on_delete=models.SET_NULL) + is_active = models.BooleanField(default=False) is_staff = models.BooleanField(default=False) diff --git a/backend/authentication/views.py b/backend/authentication/views.py index 068100805..dbe97bda1 100644 --- a/backend/authentication/views.py +++ b/backend/authentication/views.py @@ -68,10 +68,11 @@ def register_view(request): if CustomUser.objects.filter(email=email).exists(): return JsonResponse({"error": "Email is already taken"}, status=status.HTTP_400_BAD_REQUEST) - CustomUser.objects.create_user(email, password=password) + user = CustomUser.objects.create_user(email, password=password) + user.is_active = True # Set is_active to True + user.save() return JsonResponse({"data": "User created successfully"}, status=status.HTTP_201_CREATED) - @api_view(["GET"]) def verify_session(request): session_cookie = request.COOKIES.get("sessionid") diff --git a/backend/backend/__init__.py b/backend/backend/__init__.py index e69de29bb..2c87c1b8f 100644 --- a/backend/backend/__init__.py +++ b/backend/backend/__init__.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import, unicode_literals +# Ensures compatibility between Python 2 and 3 for import behavior and string literals + +from .celery import app as celery_app +# Imports the Celery app instance from celery.py and exposes it as celery_app + +__all__ = ('celery_app',) +# Defines the public API of this module to include celery_app diff --git a/backend/backend/celery.py b/backend/backend/celery.py new file mode 100644 index 000000000..ef1e7ba79 --- /dev/null +++ b/backend/backend/celery.py @@ -0,0 +1,14 @@ +import os +from celery import Celery + +# Set default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'backend.settings') + +# Create a new Celery app instance named 'backend' +app = Celery('backend') + +# Load task modules from Django settings, using the CELERY namespace for config keys +app.config_from_object('django.conf:settings', namespace='CELERY') + +# Automatically discover tasks.py in installed apps +app.autodiscover_tasks() diff --git a/backend/backend/settings.py b/backend/backend/settings.py index 9de4f024a..7e718ef46 100644 --- a/backend/backend/settings.py +++ b/backend/backend/settings.py @@ -1,31 +1,17 @@ -""" -Django settings for backend project. - -Generated by 'django-admin startproject' using Django 4.2.5. - -For more information on this file, see -https://docs.djangoproject.com/en/4.2/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/4.2/ref/settings/ -""" - import os from pathlib import Path - from dotenv import load_dotenv -load_dotenv() +# Load .env file and override existing environment variables if any +load_dotenv(override=True) # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ - # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = os.environ["DJANGO_SECRET_KEY"] FRONTEND_URL = os.environ["FRONTEND_URL"] +GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True @@ -33,7 +19,6 @@ ALLOWED_HOSTS = [] # Application definition - INSTALLED_APPS = [ "django.contrib.admin", "django.contrib.auth", @@ -47,8 +32,25 @@ "authentication", "chat", "gpt", + "django_celery_beat", ] +# Celery Settings +CELERY_BROKER_URL = 'redis://localhost:6379/0' +CELERY_TASK_SERIALIZER = 'json' + +# Database configuration - PostgreSQL +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': 'fullstackdb', + 'USER': 'postgres', + 'PASSWORD': 'students', + 'HOST': 'localhost', + 'PORT': '5432', + } +} + MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", @@ -80,20 +82,7 @@ WSGI_APPLICATION = "backend.wsgi.application" - -# Database -# https://docs.djangoproject.com/en/4.2/ref/settings/#databases - -DATABASES = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": BASE_DIR / "db.sqlite3", - } -} - # Password validation -# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators - AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", @@ -117,26 +106,19 @@ ] # Internationalization -# https://docs.djangoproject.com/en/4.2/topics/i18n/ - LANGUAGE_CODE = "en-us" - TIME_ZONE = "Europe/Warsaw" USE_TZ = True - USE_I18N = True # Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/4.2/howto/static-files/ - STATIC_ROOT = BASE_DIR / "static" STATIC_URL = "/static/" # Default primary key field type -# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field - DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" +# CORS settings CORS_ALLOWED_ORIGINS = [ FRONTEND_URL, ] @@ -146,6 +128,46 @@ FRONTEND_URL, ] +# Security cookies SESSION_COOKIE_SECURE = True CSRF_COOKIE_SECURE = True CSRF_COOKIE_SAMESITE = "None" + +# DRF (Django REST Framework) settings +REST_FRAMEWORK = { + 'DEFAULT_AUTHENTICATION_CLASSES': [ + 'rest_framework.authentication.SessionAuthentication', + ], + 'DEFAULT_PERMISSION_CLASSES': [ + 'rest_framework.permissions.AllowAny', + ], +} + +# Configure Logging +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + }, + 'file': { + 'class': 'logging.FileHandler', + 'filename': 'logs.log', + }, + }, + 'loggers': { + 'django': { + 'handlers': ['console', 'file'], + 'level': 'INFO', + }, + }, +} + +# Configuring caching +CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.redis.RedisCache', + 'LOCATION': 'redis://localhost:6379/1', + } +} \ No newline at end of file diff --git a/backend/backend/urls.py b/backend/backend/urls.py index fa154c7fb..9648e680b 100644 --- a/backend/backend/urls.py +++ b/backend/backend/urls.py @@ -5,16 +5,18 @@ from django.urls import include, path from rest_framework.decorators import api_view - +# Simple root endpoint to verify the app is running @api_view(["GET"]) def root_view(request): return JsonResponse({"message": "App works!"}) - urlpatterns = [ - path("admin/", admin.site.urls), - path("chat/", include("chat.urls")), - path("gpt/", include("gpt.urls")), - path("auth/", include("authentication.urls")), - path("", root_view), -] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + path("admin/", admin.site.urls), # Admin site + path("chat/", include("chat.urls")), # Chat app URLs + path("gpt/", include("gpt.urls")), # GPT app URLs + path("auth/", include("authentication.urls")), # Authentication URLs + path("", root_view), # Root endpoint +] + +# Serve static files during development +urlpatterns += static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) diff --git a/backend/basic_thermo.pdf b/backend/basic_thermo.pdf new file mode 100644 index 000000000..2bd225185 Binary files /dev/null and b/backend/basic_thermo.pdf differ diff --git a/backend/chat/admin.py b/backend/chat/admin.py index a4e7d15fc..9eae2cfd9 100644 --- a/backend/chat/admin.py +++ b/backend/chat/admin.py @@ -1,32 +1,31 @@ from django.contrib import admin from django.utils import timezone from nested_admin.nested import NestedModelAdmin, NestedStackedInline, NestedTabularInline - from chat.models import Conversation, Message, Role, Version class RoleAdmin(NestedModelAdmin): - list_display = ["id", "name"] + list_display = ["id", "name"] # Show role ID and name class MessageAdmin(NestedModelAdmin): - list_display = ["display_desc", "role", "id", "created_at", "version"] + list_display = ["display_desc", "role", "id", "created_at", "version"] # Display key fields def display_desc(self, obj): - return obj.content[:20] + "..." + return obj.content[:20] + "..." # Short preview of message content display_desc.short_description = "content" class MessageInline(NestedTabularInline): model = Message - extra = 2 # number of extra forms to display + extra = 2 # Show 2 empty forms by default class VersionInline(NestedStackedInline): model = Version extra = 1 - inlines = [MessageInline] + inlines = [MessageInline] # Nested messages inside versions class DeletedListFilter(admin.SimpleListFilter): @@ -34,48 +33,54 @@ class DeletedListFilter(admin.SimpleListFilter): parameter_name = "deleted" def lookups(self, request, model_admin): - return ( - ("True", "Yes"), - ("False", "No"), - ) + return (("True", "Yes"), ("False", "No")) def queryset(self, request, queryset): value = self.value() if value == "True": - return queryset.filter(deleted_at__isnull=False) + return queryset.filter(deleted_at__isnull=False) # Show deleted elif value == "False": - return queryset.filter(deleted_at__isnull=True) + return queryset.filter(deleted_at__isnull=True) # Show not deleted return queryset class ConversationAdmin(NestedModelAdmin): actions = ["undelete_selected", "soft_delete_selected"] inlines = [VersionInline] - list_display = ("title", "id", "created_at", "modified_at", "deleted_at", "version_count", "is_deleted", "user") + list_display = ( + "title", + "id", + "created_at", + "modified_at", + "deleted_at", + "version_count", + "is_deleted", + "user", + "summary", # Show summary field + ) list_filter = (DeletedListFilter,) ordering = ("-modified_at",) + readonly_fields = ("summary", "created_at", "modified_at", "deleted_at") # Prevent editing these def undelete_selected(self, request, queryset): - queryset.update(deleted_at=None) + queryset.update(deleted_at=None) # Undo soft-delete undelete_selected.short_description = "Undelete selected conversations" def soft_delete_selected(self, request, queryset): - queryset.update(deleted_at=timezone.now()) + queryset.update(deleted_at=timezone.now()) # Soft-delete by timestamp soft_delete_selected.short_description = "Soft delete selected conversations" def get_action_choices(self, request, **kwargs): choices = super().get_action_choices(request) for idx, choice in enumerate(choices): - fn_name = choice[0] - if fn_name == "delete_selected": - new_choice = (fn_name, "Hard delete selected conversations") - choices[idx] = new_choice + if choice[0] == "delete_selected": + choices[idx] = (choice[0], "Hard delete selected conversations") # Rename action return choices def is_deleted(self, obj): - return obj.deleted_at is not None + return obj.deleted_at is not None # Boolean flag for deleted is_deleted.boolean = True is_deleted.short_description = "Deleted?" @@ -83,9 +88,10 @@ def is_deleted(self, obj): class VersionAdmin(NestedModelAdmin): inlines = [MessageInline] - list_display = ("id", "conversation", "parent_version", "root_message") + list_display = ("id", "conversation", "parent_version", "root_message") # Key fields +# Register models with admin site admin.site.register(Role, RoleAdmin) admin.site.register(Message, MessageAdmin) admin.site.register(Conversation, ConversationAdmin) diff --git a/backend/chat/management/commands/cleanup_conversations.py b/backend/chat/management/commands/cleanup_conversations.py new file mode 100644 index 000000000..01d895c22 --- /dev/null +++ b/backend/chat/management/commands/cleanup_conversations.py @@ -0,0 +1,14 @@ +# chat/management/commands/cleanup_conversations.py + +from django.core.management.base import BaseCommand +from chat.models import Conversation +from datetime import timedelta +from django.utils import timezone + +class Command(BaseCommand): + help = 'Deletes conversations older than 30 days' # Command description + + def handle(self, *args, **kwargs): + threshold = timezone.now() - timedelta(days=30) # Cutoff date + deleted, _ = Conversation.objects.filter(created_at__lt=threshold).delete() # Delete old conversations + self.stdout.write(self.style.SUCCESS(f"Deleted {deleted} old conversations")) # Print result diff --git a/backend/chat/management/commands/create_roles.py b/backend/chat/management/commands/create_roles.py index 43597cadc..e41849345 100644 --- a/backend/chat/management/commands/create_roles.py +++ b/backend/chat/management/commands/create_roles.py @@ -1,10 +1,8 @@ from django.core.management.base import BaseCommand - from chat.models import Role - class Command(BaseCommand): def handle(self, *args, **options): - Role.objects.get_or_create(name="user") - Role.objects.get_or_create(name="assistant") - self.stdout.write(self.style.SUCCESS("Successfully created roles")) + Role.objects.get_or_create(name="user") # Create 'user' role if not exists + Role.objects.get_or_create(name="assistant") # Create 'assistant' role if not exists + self.stdout.write(self.style.SUCCESS("Successfully created roles")) # Success message diff --git a/backend/chat/migrations/0002_conversation_summary.py b/backend/chat/migrations/0002_conversation_summary.py new file mode 100644 index 000000000..26a0db7ea --- /dev/null +++ b/backend/chat/migrations/0002_conversation_summary.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.4 on 2025-07-28 06:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='conversation', + name='summary', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/backend/chat/migrations/0003_remove_conversation_active_version_and_more.py b/backend/chat/migrations/0003_remove_conversation_active_version_and_more.py new file mode 100644 index 000000000..7d6a6388d --- /dev/null +++ b/backend/chat/migrations/0003_remove_conversation_active_version_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 5.2.4 on 2025-07-28 07:21 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0002_conversation_summary'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.RemoveField( + model_name='conversation', + name='active_version', + ), + migrations.AlterField( + model_name='conversation', + name='title', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name='conversation', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversations', to=settings.AUTH_USER_MODEL), + ), + ] diff --git a/backend/chat/migrations/0004_conversation_active_version_alter_conversation_title_and_more.py b/backend/chat/migrations/0004_conversation_active_version_alter_conversation_title_and_more.py new file mode 100644 index 000000000..ed4b47132 --- /dev/null +++ b/backend/chat/migrations/0004_conversation_active_version_alter_conversation_title_and_more.py @@ -0,0 +1,31 @@ +# Generated by Django 5.2.4 on 2025-07-28 09:03 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0003_remove_conversation_active_version_and_more'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AddField( + model_name='conversation', + name='active_version', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='current_version_conversations', to='chat.version'), + ), + migrations.AlterField( + model_name='conversation', + name='title', + field=models.CharField(default='Mock title', max_length=100), + ), + migrations.AlterField( + model_name='conversation', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL), + ), + ] diff --git a/backend/chat/migrations/0005_uploadedfile.py b/backend/chat/migrations/0005_uploadedfile.py new file mode 100644 index 000000000..1ffda693d --- /dev/null +++ b/backend/chat/migrations/0005_uploadedfile.py @@ -0,0 +1,27 @@ +# Generated by Django 5.2.4 on 2025-07-30 06:06 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0004_conversation_active_version_alter_conversation_title_and_more'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='UploadedFile', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('file', models.FileField(upload_to='uploads/')), + ('uploaded_at', models.DateTimeField(auto_now_add=True)), + ('filename', models.CharField(max_length=255)), + ('checksum', models.CharField(max_length=255)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/backend/chat/migrations/0006_uploadedfile_size_alter_uploadedfile_checksum.py b/backend/chat/migrations/0006_uploadedfile_size_alter_uploadedfile_checksum.py new file mode 100644 index 000000000..a34a64e86 --- /dev/null +++ b/backend/chat/migrations/0006_uploadedfile_size_alter_uploadedfile_checksum.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.4 on 2025-07-30 06:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0005_uploadedfile'), + ] + + operations = [ + migrations.AddField( + model_name='uploadedfile', + name='size', + field=models.IntegerField(blank=True, null=True), + ), + migrations.AlterField( + model_name='uploadedfile', + name='checksum', + field=models.CharField(max_length=255, unique=True), + ), + ] diff --git a/backend/chat/models.py b/backend/chat/models.py index 242788f14..9ca719048 100644 --- a/backend/chat/models.py +++ b/backend/chat/models.py @@ -1,65 +1,129 @@ import uuid - from django.db import models - from authentication.models import CustomUser +from chat.utils.summarizer import generate_summary # Your own T5-based summarizer class Role(models.Model): - name = models.CharField(max_length=20, blank=False, null=False, default="user") + """ + Represents the role of a message sender, e.g., "user", "assistant". + """ + name = models.CharField(max_length=20, default="user") def __str__(self): return self.name class Conversation(models.Model): + """ + Stores chat conversations tied to a user. + Supports soft deletion with deleted_at and keeps track of an active version. + Also stores a summary of the conversation content. + """ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - title = models.CharField(max_length=100, blank=False, null=False, default="Mock title") + title = models.CharField(max_length=100, default="Mock title") created_at = models.DateTimeField(auto_now_add=True) modified_at = models.DateTimeField(auto_now=True) active_version = models.ForeignKey( - "Version", null=True, blank=True, on_delete=models.CASCADE, related_name="current_version_conversations" + "Version", + null=True, + blank=True, + on_delete=models.CASCADE, + related_name="current_version_conversations" ) - deleted_at = models.DateTimeField(null=True, blank=True) + deleted_at = models.DateTimeField(null=True, blank=True) # Soft delete timestamp user = models.ForeignKey(CustomUser, on_delete=models.CASCADE) + summary = models.TextField(blank=True, null=True) # Summary generated from messages def __str__(self): return self.title def version_count(self): + """ + Returns the total number of versions under this conversation. + Useful for admin or UI display. + """ return self.versions.count() - version_count.short_description = "Number of versions" + def update_summary(self): + """ + Generate and update the summary field by concatenating all messages content + in this conversation and passing it to your summarizer utility. + Called automatically after a message is saved. + """ + from .models import Message # Avoid circular import + + messages = Message.objects.filter(version__conversation=self).order_by("created_at") + combined_text = " ".join(msg.content for msg in messages) + if combined_text.strip(): + self.summary = generate_summary(combined_text) + self.save() + class Version(models.Model): + """ + Represents a specific version (or branch) of a conversation. + Versions can be branched from parent versions and link to a root message. + """ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) conversation = models.ForeignKey("Conversation", related_name="versions", on_delete=models.CASCADE) parent_version = models.ForeignKey("self", null=True, blank=True, on_delete=models.SET_NULL) root_message = models.ForeignKey( - "Message", null=True, blank=True, on_delete=models.SET_NULL, related_name="root_message_versions" + "Message", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="root_message_versions" ) def __str__(self): - if self.root_message: - return f"Version of `{self.conversation.title}` created at `{self.root_message.created_at}`" - else: - return f"Version of `{self.conversation.title}` with no root message yet" + return f"Version of `{self.conversation.title}`" class Message(models.Model): + """ + Individual chat messages belonging to a specific version. + Each message has a role (e.g., user or assistant). + """ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - content = models.TextField(blank=False, null=False) + content = models.TextField() role = models.ForeignKey(Role, on_delete=models.CASCADE) created_at = models.DateTimeField(auto_now_add=True) version = models.ForeignKey("Version", related_name="messages", on_delete=models.CASCADE) class Meta: - ordering = ["created_at"] + ordering = ["created_at"] # Ensures messages are retrieved in creation order + + def save(self, *args, **kwargs): + """ + Override save to automatically update the conversation summary + whenever a new message is saved. + """ + super().save(*args, **kwargs) + self.version.conversation.update_summary() + + def __str__(self): + return f"{self.role}: {self.content[:20]}..." # Display role and snippet of content + + +class UploadedFile(models.Model): + """ + Tracks files uploaded by users. + Stores file metadata like filename, checksum, size, and upload timestamp. + """ + user = models.ForeignKey(CustomUser, on_delete=models.CASCADE) + file = models.FileField(upload_to='uploads/') + uploaded_at = models.DateTimeField(auto_now_add=True) + filename = models.CharField(max_length=255) + checksum = models.CharField(max_length=255, unique=True) # For detecting duplicates + size = models.IntegerField(null=True, blank=True) # File size in bytes def save(self, *args, **kwargs): - self.version.conversation.save() + # Automatically sets size if not already set + if not self.size and self.file: + self.size = self.file.size super().save(*args, **kwargs) def __str__(self): - return f"{self.role}: {self.content[:20]}..." + return self.filename diff --git a/backend/chat/serializers.py b/backend/chat/serializers.py index 0c721c061..47477f876 100644 --- a/backend/chat/serializers.py +++ b/backend/chat/serializers.py @@ -3,75 +3,113 @@ from rest_framework import serializers from chat.models import Conversation, Message, Role, Version +from .models import Conversation, UploadedFile def should_serialize(validated_data, field_name) -> bool: + """ + Helper function to check if a field should be serialized, + i.e., if the field exists and is not None in validated_data. + """ if validated_data.get(field_name) is not None: return True class TitleSerializer(serializers.Serializer): + """ + Simple serializer for validating/updating a title string. + Used when only a title update is needed. + """ title = serializers.CharField(max_length=100, required=True) class VersionTimeIdSerializer(serializers.Serializer): + """ + Serializer that exposes only version id and created_at datetime. + Useful for listing versions in minimal form. + """ id = serializers.UUIDField() created_at = serializers.DateTimeField() class MessageSerializer(serializers.ModelSerializer): + """ + Serializer for Message model. + Role is represented by its 'name' string instead of primary key. + """ role = serializers.SlugRelatedField(slug_field="name", queryset=Role.objects.all()) class Meta: model = Message fields = [ - "id", # DB - "content", - "role", # required - "created_at", # DB, read-only + "id", # Primary key + "content", # Message text content + "role", # Role name (e.g. user, assistant) + "created_at", # Timestamp, read-only ] - read_only_fields = ["id", "created_at", "version"] + read_only_fields = ["id", "created_at", "version"] # version is set automatically def create(self, validated_data): + """ + Create a new Message instance. + """ message = Message.objects.create(**validated_data) return message def to_representation(self, instance): + """ + Customize output representation. + Adds an empty "versions" list to each message (potentially for frontend use). + """ representation = super().to_representation(instance) - representation["versions"] = [] # add versions field + representation["versions"] = [] # placeholder for versions data if needed return representation class VersionSerializer(serializers.ModelSerializer): - messages = MessageSerializer(many=True) - active = serializers.SerializerMethodField() - conversation_id = serializers.UUIDField(source="conversation.id") - created_at = serializers.SerializerMethodField() + """ + Serializer for Version model, including nested messages. + Adds 'active' boolean field indicating if this version is the active version in the conversation. + """ + messages = MessageSerializer(many=True) # nested serializer for messages + active = serializers.SerializerMethodField() # computed field + conversation_id = serializers.UUIDField(source="conversation.id") # expose conversation id + created_at = serializers.SerializerMethodField() # custom created_at field logic class Meta: model = Version fields = [ "id", - "conversation_id", # DB + "conversation_id", # UUID of the parent conversation "root_message", - "messages", - "active", - "created_at", # DB, read-only - "parent_version", # optional + "messages", # nested messages data + "active", # is this version the active one? + "created_at", # read-only, custom computed field + "parent_version", # optional, allows branching ] read_only_fields = ["id", "conversation"] @staticmethod def get_active(obj): + """ + Returns True if this version is the active version for the conversation. + """ return obj == obj.conversation.active_version @staticmethod def get_created_at(obj): + """ + Returns the timestamp for the version creation. + Uses root_message's creation time if set, else falls back to conversation creation time. + """ if obj.root_message is None: return timezone.localtime(obj.conversation.created_at) return timezone.localtime(obj.root_message.created_at) def create(self, validated_data): + """ + Creates a Version along with nested messages. + """ messages_data = validated_data.pop("messages") version = Version.objects.create(**validated_data) for message_data in messages_data: @@ -80,9 +118,15 @@ def create(self, validated_data): return version def update(self, instance, validated_data): + """ + Updates Version fields and nested messages. + Requires at least one of 'conversation', 'parent_version', or 'root_message' fields to be provided. + """ instance.conversation = validated_data.get("conversation", instance.conversation) instance.parent_version = validated_data.get("parent_version", instance.parent_version) instance.root_message = validated_data.get("root_message", instance.root_message) + + # Validate that at least one required field is provided if not any( [ should_serialize(validated_data, "conversation"), @@ -95,33 +139,42 @@ def update(self, instance, validated_data): ) instance.save() + # Update or create nested messages messages_data = validated_data.pop("messages", []) for message_data in messages_data: if "id" in message_data: + # Update existing message message = Message.objects.get(id=message_data["id"], version=instance) message.content = message_data.get("content", message.content) message.role = message_data.get("role", message.role) message.save() else: + # Create new message Message.objects.create(version=instance, **message_data) return instance class ConversationSerializer(serializers.ModelSerializer): + """ + Serializer for Conversation model including nested versions. + """ versions = VersionSerializer(many=True) class Meta: model = Conversation fields = [ - "id", # DB - "title", # required - "active_version", - "versions", # optional - "modified_at", # DB, read-only + "id", # UUID primary key + "title", # Conversation title + "active_version", # ForeignKey to active version (UUID) + "versions", # Nested list of versions + "modified_at", # Timestamp, read-only ] def create(self, validated_data): + """ + Creates a Conversation and nested versions. + """ versions_data = validated_data.pop("versions", []) conversation = Conversation.objects.create(**validated_data) for version_data in versions_data: @@ -132,6 +185,10 @@ def create(self, validated_data): return conversation def update(self, instance, validated_data): + """ + Updates Conversation fields and nested versions. + Also updates active_version by its UUID. + """ instance.title = validated_data.get("title", instance.title) active_version_id = validated_data.get("active_version", instance.active_version) if active_version_id is not None: @@ -139,6 +196,7 @@ def update(self, instance, validated_data): instance.active_version = active_version instance.save() + # Update or create nested versions versions_data = validated_data.pop("versions", []) for version_data in versions_data: if "id" in version_data: @@ -150,3 +208,23 @@ def update(self, instance, validated_data): version_serializer.save(conversation=instance) return instance + + +class ConversationSummarySerializer(serializers.ModelSerializer): + """ + Serializer for returning conversation summaries. + Only returns id, title, and summary text. + """ + class Meta: + model = Conversation + fields = ['id', 'title', 'summary'] + + +class UploadedFileSerializer(serializers.ModelSerializer): + """ + Serializer for uploaded files. + Returns metadata like filename, checksum, and upload timestamp. + """ + class Meta: + model = UploadedFile + fields = ['id', 'file', 'uploaded_at', 'filename', 'checksum'] diff --git a/backend/chat/tasks.py b/backend/chat/tasks.py new file mode 100644 index 000000000..22767708f --- /dev/null +++ b/backend/chat/tasks.py @@ -0,0 +1,15 @@ +from celery import shared_task +from django.utils import timezone +from datetime import timedelta +from chat.models import Conversation + +@shared_task +def cleanup_old_conversations(): + # Set cutoff date 30 days ago + threshold_date = timezone.now() - timedelta(days=30) + + # Delete conversations older than cutoff + deleted_count, _ = Conversation.objects.filter(created_at__lt=threshold_date).delete() + + # Return number of deleted conversations + return f"Deleted {deleted_count} old conversations" diff --git a/backend/chat/urls.py b/backend/chat/urls.py index bd8ceadc0..7f1a364fe 100644 --- a/backend/chat/urls.py +++ b/backend/chat/urls.py @@ -1,9 +1,20 @@ from django.urls import path - from chat import views +from chat.views import ( + upload_file, + list_uploaded_files, + delete_uploaded_file, +) urlpatterns = [ + # ------------------------------- + # Root / Basic Health Check + # ------------------------------- path("", views.chat_root_view, name="chat_root_view"), + + # ------------------------------- + # Conversations + # ------------------------------- path("conversations/", views.get_conversations, name="get_conversations"), path("conversations_branched/", views.get_conversations_branched, name="get_branched_conversations"), path("conversation_branched//", views.get_conversation_branched, name="get_branched_conversation"), @@ -18,5 +29,17 @@ name="conversation_switch_version", ), path("conversations//delete/", views.conversation_soft_delete, name="conversation_delete"), + path("conversations/summaries/", views.get_conversation_summaries, name="conversation-summaries"), + + # ------------------------------- + # Versions + # ------------------------------- path("versions//add_message/", views.version_add_message, name="version_add_message"), + + # ------------------------------- + # File Upload and Management + # ------------------------------- + path("files/upload/", upload_file, name="upload-file"), + path("files/", list_uploaded_files, name="list-uploaded-files"), + path("files//delete/", delete_uploaded_file, name="delete-uploaded-file"), ] diff --git a/backend/chat/utils/summarizer.py b/backend/chat/utils/summarizer.py new file mode 100644 index 000000000..6d67f9305 --- /dev/null +++ b/backend/chat/utils/summarizer.py @@ -0,0 +1,15 @@ +from transformers import T5Tokenizer, T5ForConditionalGeneration +import torch + +# Load the model only once +tokenizer = T5Tokenizer.from_pretrained("t5-small") +model = T5ForConditionalGeneration.from_pretrained("t5-small") + +def generate_summary(text, max_length=100): + if not text.strip(): + return "No content available." + input_text = "summarize: " + text + inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) + summary_ids = model.generate(inputs, max_length=max_length, num_beams=2, early_stopping=True) + summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) + return summary diff --git a/backend/chat/views.py b/backend/chat/views.py index 0d18f7a69..bca7995ba 100644 --- a/backend/chat/views.py +++ b/backend/chat/views.py @@ -1,232 +1,304 @@ -from django.contrib.auth.decorators import login_required -from django.utils import timezone -from rest_framework import status -from rest_framework.decorators import api_view +from rest_framework.permissions import IsAuthenticated +from rest_framework.parsers import MultiPartParser, FormParser +from rest_framework.decorators import api_view, permission_classes, parser_classes from rest_framework.response import Response - -from chat.models import Conversation, Message, Version -from chat.serializers import ConversationSerializer, MessageSerializer, TitleSerializer, VersionSerializer +from rest_framework.pagination import PageNumberPagination +from .models import UploadedFile, Conversation, Message, Version +from .serializers import ( + UploadedFileSerializer, ConversationSerializer, MessageSerializer, + TitleSerializer, VersionSerializer, ConversationSummarySerializer +) from chat.utils.branching import make_branched_conversation +import hashlib +from django.utils import timezone - +# ------------------------------- +# Basic Health Check +# ------------------------------- @api_view(["GET"]) def chat_root_view(request): - return Response({"message": "Chat works!"}, status=status.HTTP_200_OK) + return Response({"message": "Chat works!"}) - -@login_required +# ------------------------------- +# Conversation Endpoints +# ------------------------------- @api_view(["GET"]) +@permission_classes([IsAuthenticated]) def get_conversations(request): - conversations = Conversation.objects.filter(user=request.user, deleted_at__isnull=True).order_by("-modified_at") + """List all active conversations for the authenticated user.""" + conversations = Conversation.objects.filter( + user=request.user, deleted_at__isnull=True + ).order_by("-modified_at") serializer = ConversationSerializer(conversations, many=True) - return Response(serializer.data, status=status.HTTP_200_OK) - + return Response(serializer.data) -@login_required @api_view(["GET"]) +@permission_classes([IsAuthenticated]) def get_conversations_branched(request): - conversations = Conversation.objects.filter(user=request.user, deleted_at__isnull=True).order_by("-modified_at") - conversations_serializer = ConversationSerializer(conversations, many=True) - conversations_data = conversations_serializer.data + """Return conversations in branched structure.""" + conversations = Conversation.objects.filter( + user=request.user, deleted_at__isnull=True + ).order_by("-modified_at") - for conversation_data in conversations_data: - make_branched_conversation(conversation_data) + data = ConversationSerializer(conversations, many=True).data + for convo in data: + make_branched_conversation(convo) - return Response(conversations_data, status=status.HTTP_200_OK) + return Response(data) - -@login_required @api_view(["GET"]) +@permission_classes([IsAuthenticated]) def get_conversation_branched(request, pk): + """Return a single conversation in branched format.""" try: conversation = Conversation.objects.get(user=request.user, pk=pk) except Conversation.DoesNotExist: - return Response({"detail": "Conversation not found"}, status=status.HTTP_404_NOT_FOUND) - - conversation_serializer = ConversationSerializer(conversation) - conversation_data = conversation_serializer.data - make_branched_conversation(conversation_data) + return Response({"detail": "Conversation not found"}, status=404) - return Response(conversation_data, status=status.HTTP_200_OK) + data = ConversationSerializer(conversation).data + make_branched_conversation(data) + return Response(data) - -@login_required @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def add_conversation(request): + """Create a new conversation along with its messages.""" try: - conversation_data = {"title": request.data.get("title", "Mock title"), "user": request.user} - conversation = Conversation.objects.create(**conversation_data) + conversation = Conversation.objects.create( + title=request.data.get("title", "Mock title"), + user=request.user + ) version = Version.objects.create(conversation=conversation) messages_data = request.data.get("messages", []) - for idx, message_data in enumerate(messages_data): - message_serializer = MessageSerializer(data=message_data) - if message_serializer.is_valid(): - message_serializer.save(version=version) + for idx, msg in enumerate(messages_data): + serializer = MessageSerializer(data=msg) + if serializer.is_valid(): + serializer.save(version=version) if idx == 0: version.save() else: - return Response(message_serializer.errors, status=status.HTTP_400_BAD_REQUEST) + return Response(serializer.errors, status=400) conversation.active_version = version conversation.save() - serializer = ConversationSerializer(conversation) - return Response(serializer.data, status=status.HTTP_201_CREATED) + return Response(ConversationSerializer(conversation).data, status=201) except Exception as e: - return Response({"detail": str(e)}, status=status.HTTP_400_BAD_REQUEST) + return Response({"detail": str(e)}, status=400) - -@login_required @api_view(["GET", "PUT", "DELETE"]) +@permission_classes([IsAuthenticated]) def conversation_manage(request, pk): + """Retrieve, update or hard-delete a conversation.""" try: conversation = Conversation.objects.get(user=request.user, pk=pk) except Conversation.DoesNotExist: - return Response(status=status.HTTP_404_NOT_FOUND) + return Response(status=404) if request.method == "GET": - serializer = ConversationSerializer(conversation) - return Response(serializer.data) + return Response(ConversationSerializer(conversation).data) - elif request.method == "PUT": + if request.method == "PUT": serializer = ConversationSerializer(conversation, data=request.data) if serializer.is_valid(): serializer.save() return Response(serializer.data) - return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + return Response(serializer.errors, status=400) - elif request.method == "DELETE": + if request.method == "DELETE": conversation.delete() - return Response(status=status.HTTP_204_NO_CONTENT) - + return Response(status=204) -@login_required @api_view(["PUT"]) +@permission_classes([IsAuthenticated]) def conversation_change_title(request, pk): + """Update the title of a conversation.""" try: conversation = Conversation.objects.get(user=request.user, pk=pk) except Conversation.DoesNotExist: - return Response(status=status.HTTP_404_NOT_FOUND) + return Response(status=404) serializer = TitleSerializer(data=request.data) - if serializer.is_valid(): - conversation.title = serializer.data.get("title") + conversation.title = serializer.validated_data["title"] conversation.save() - return Response(status=status.HTTP_204_NO_CONTENT) - - return Response({"detail": "Title not provided"}, status=status.HTTP_400_BAD_REQUEST) + return Response(status=204) + return Response({"detail": "Invalid title"}, status=400) - -@login_required @api_view(["PUT"]) +@permission_classes([IsAuthenticated]) def conversation_soft_delete(request, pk): + """Soft delete a conversation by setting deleted_at.""" try: conversation = Conversation.objects.get(user=request.user, pk=pk) except Conversation.DoesNotExist: - return Response(status=status.HTTP_404_NOT_FOUND) + return Response(status=404) conversation.deleted_at = timezone.now() conversation.save() - return Response(status=status.HTTP_204_NO_CONTENT) - + return Response(status=204) -@login_required +@api_view(["GET"]) +@permission_classes([IsAuthenticated]) +def get_conversation_summaries(request): + """ + Return paginated conversation summaries with optional filtering by title. + """ + # Get conversations for logged-in user, not deleted + qs = Conversation.objects.filter(user=request.user, deleted_at__isnull=True).order_by("-modified_at") + + # Optional filtering by title query param + title = request.query_params.get("title") + if title: + qs = qs.filter(title__icontains=title) + + # Paginate the queryset + paginator = PageNumberPagination() + paginated_qs = paginator.paginate_queryset(qs, request) + serializer = ConversationSummarySerializer(paginated_qs, many=True) + return paginator.get_paginated_response(serializer.data) +# ------------------------------- +# Message & Version Endpoints +# ------------------------------- @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def conversation_add_message(request, pk): + """Add a message to a conversation's active version.""" try: conversation = Conversation.objects.get(user=request.user, pk=pk) version = conversation.active_version except Conversation.DoesNotExist: - return Response(status=status.HTTP_404_NOT_FOUND) + return Response(status=404) - if version is None: - return Response({"detail": "Active version not set for this conversation."}, status=status.HTTP_400_BAD_REQUEST) + if not version: + return Response({"detail": "Active version not set."}, status=400) serializer = MessageSerializer(data=request.data) if serializer.is_valid(): serializer.save(version=version) - # return Response(serializer.data, status=status.HTTP_201_CREATED) - return Response( - { - "message": serializer.data, - "conversation_id": conversation.id, - }, - status=status.HTTP_201_CREATED, - ) - return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + return Response({ + "message": serializer.data, + "conversation_id": conversation.id, + }, status=201) + return Response(serializer.errors, status=400) - -@login_required @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def conversation_add_version(request, pk): + """Fork a conversation starting from a specific root message.""" try: conversation = Conversation.objects.get(user=request.user, pk=pk) version = conversation.active_version - root_message_id = request.data.get("root_message_id") - root_message = Message.objects.get(pk=root_message_id) + root_msg_id = request.data.get("root_message_id") + root_message = Message.objects.get(pk=root_msg_id) except Conversation.DoesNotExist: - return Response(status=status.HTTP_404_NOT_FOUND) + return Response(status=404) except Message.DoesNotExist: - return Response({"detail": "Root message not found"}, status=status.HTTP_404_NOT_FOUND) + return Response({"detail": "Root message not found"}, status=404) - # Check if root message belongs to the same conversation if root_message.version.conversation != conversation: - return Response({"detail": "Root message not part of the conversation"}, status=status.HTTP_400_BAD_REQUEST) + return Response({"detail": "Invalid root message."}, status=400) new_version = Version.objects.create( - conversation=conversation, parent_version=root_message.version, root_message=root_message + conversation=conversation, + parent_version=root_message.version, + root_message=root_message ) - # Copy messages before root_message to new_version - messages_before_root = Message.objects.filter(version=version, created_at__lt=root_message.created_at) - new_messages = [ - Message(content=message.content, role=message.role, version=new_version) for message in messages_before_root - ] - Message.objects.bulk_create(new_messages) + msgs = Message.objects.filter(version=version, created_at__lt=root_message.created_at) + Message.objects.bulk_create([ + Message(content=m.content, role=m.role, version=new_version) + for m in msgs + ]) - # Set the new version as the current version conversation.active_version = new_version conversation.save() - serializer = VersionSerializer(new_version) - return Response(serializer.data, status=status.HTTP_201_CREATED) - + return Response(VersionSerializer(new_version).data, status=201) -@login_required @api_view(["PUT"]) +@permission_classes([IsAuthenticated]) def conversation_switch_version(request, pk, version_id): + """Switch the active version of a conversation.""" try: conversation = Conversation.objects.get(pk=pk) version = Version.objects.get(pk=version_id, conversation=conversation) except Conversation.DoesNotExist: - return Response({"detail": "Conversation not found"}, status=status.HTTP_404_NOT_FOUND) + return Response({"detail": "Conversation not found"}, status=404) except Version.DoesNotExist: - return Response({"detail": "Version not found"}, status=status.HTTP_404_NOT_FOUND) + return Response({"detail": "Version not found"}, status=404) conversation.active_version = version conversation.save() + return Response(status=204) - return Response(status=status.HTTP_204_NO_CONTENT) - - -@login_required @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def version_add_message(request, pk): + """Add a message to a specific version.""" try: version = Version.objects.get(pk=pk) except Version.DoesNotExist: - return Response(status=status.HTTP_404_NOT_FOUND) + return Response(status=404) serializer = MessageSerializer(data=request.data) if serializer.is_valid(): serializer.save(version=version) - return Response( - { - "message": serializer.data, - "version_id": version.id, - }, - status=status.HTTP_201_CREATED, + return Response({ + "message": serializer.data, + "version_id": version.id, + }, status=201) + return Response(serializer.errors, status=400) + +# ------------------------------- +# File Upload & Management +# ------------------------------- +@api_view(["POST"]) +@permission_classes([IsAuthenticated]) +@parser_classes([MultiPartParser, FormParser]) +def upload_file(request): + """Upload a file with duplication check via MD5.""" + file = request.FILES.get('file') + if not file: + return Response({'error': 'No file provided'}, status=400) + if file.size == 0: + return Response({'error': 'File is empty'}, status=400) + + try: + checksum = hashlib.md5(file.read()).hexdigest() + if UploadedFile.objects.filter(checksum=checksum, user=request.user).exists(): + return Response({'error': 'File already uploaded'}, status=400) + + file.seek(0) + uploaded = UploadedFile.objects.create( + user=request.user, + file=file, + filename=file.name, + checksum=checksum ) - return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + return Response(UploadedFileSerializer(uploaded).data, status=201) + except Exception as e: + return Response({'error': str(e)}, status=500) + +@api_view(["GET"]) +@permission_classes([IsAuthenticated]) +def list_uploaded_files(request): + """List uploaded files for the current user.""" + files = UploadedFile.objects.filter(user=request.user) + return Response(UploadedFileSerializer(files, many=True).data) + +@api_view(["DELETE"]) +@permission_classes([IsAuthenticated]) +def delete_uploaded_file(request, pk): + """Delete an uploaded file and its DB record.""" + try: + file = UploadedFile.objects.get(pk=pk, user=request.user) + file.file.delete(save=False) + file.delete() + return Response(status=204) + except UploadedFile.DoesNotExist: + return Response({'error': 'File not found'}, status=404) + except Exception as e: + return Response({'error': str(e)}, status=500) + diff --git a/backend/gpt/migrations/0001_initial.py b/backend/gpt/migrations/0001_initial.py new file mode 100644 index 000000000..87ccbc3d3 --- /dev/null +++ b/backend/gpt/migrations/0001_initial.py @@ -0,0 +1,30 @@ +# Generated by Django 5.2.4 on 2025-08-01 05:48 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Role', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ], + ), + migrations.CreateModel( + name='Permission', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ('role', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='gpt.role')), + ], + ), + ] diff --git a/backend/gpt/migrations/0002_alter_permission_role_somemodel.py b/backend/gpt/migrations/0002_alter_permission_role_somemodel.py new file mode 100644 index 000000000..7c85f8930 --- /dev/null +++ b/backend/gpt/migrations/0002_alter_permission_role_somemodel.py @@ -0,0 +1,28 @@ +# Generated by Django 5.2.4 on 2025-08-01 07:05 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('gpt', '0001_initial'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AlterField( + model_name='permission', + name='role', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='permissions', to='gpt.role'), + ), + migrations.CreateModel( + name='SomeModel', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/backend/gpt/migrations/__init__.py b/backend/gpt/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/gpt/models.py b/backend/gpt/models.py new file mode 100644 index 000000000..c05956157 --- /dev/null +++ b/backend/gpt/models.py @@ -0,0 +1,20 @@ +from django.db import models +from django.contrib.auth import get_user_model + +class Role(models.Model): + name = models.CharField(max_length=255) # e.g., Admin, User + + def __str__(self): + return self.name + +class Permission(models.Model): + name = models.CharField(max_length=255) # e.g., can_edit, can_delete + role = models.ForeignKey(Role, on_delete=models.CASCADE, related_name='permissions') + + def __str__(self): + return f"{self.name} - {self.role}" + +User = get_user_model() + +class SomeModel(models.Model): + user = models.ForeignKey(User, on_delete=models.CASCADE) diff --git a/backend/gpt/permissions.py b/backend/gpt/permissions.py new file mode 100644 index 000000000..6bf7df121 --- /dev/null +++ b/backend/gpt/permissions.py @@ -0,0 +1,41 @@ +from rest_framework import permissions +import logging +from .models import Permission + + +logger = logging.getLogger(__name__) + +class HasRolePermission(permissions.BasePermission): + def has_permission(self, request, view): + perm_name = getattr(view, 'permission_name', None) + if not perm_name: + logger.debug("No permission_name on view") + return False + + try: + permission = Permission.objects.get(name=perm_name) + user_role = getattr(request.user, 'role', None) + + logger.debug(f"User role: {user_role}, Permission role: {permission.role}") + + if user_role is None: + logger.debug("User has no role assigned") + return False + + if user_role == permission.role: + return True + else: + logger.debug("User role does not match permission role") + return False + + except Permission.DoesNotExist: + logger.debug(f"Permission {perm_name} does not exist") + return False + except Exception as e: + logger.exception("Unexpected error in permission check") + return False + +from django.contrib.auth import get_user_model + +User = get_user_model() + diff --git a/backend/gpt/tests.py b/backend/gpt/tests.py new file mode 100644 index 000000000..fbfa536a7 --- /dev/null +++ b/backend/gpt/tests.py @@ -0,0 +1,138 @@ +from django.test import TestCase +from django.contrib.auth.models import User +from rest_framework.test import APIClient +from django.urls import reverse +from unittest.mock import patch +from gpt.models import Role, Permission + +class GeminiFeatureTests(TestCase): + def setUp(self): + # Setup test user, role, permission + self.client = APIClient() + self.user = User.objects.create_user(username='testuser', password='testpass') + self.client.force_authenticate(user=self.user) + self.role = Role.objects.create(name='Uploader') + self.permission = Permission.objects.create(name='file_upload', role=self.role) + self.user.role = self.role + self.user.save() + + def test_rag_generate_success(self): + # Valid query returns 200 + url = reverse('gemini:rag_generate') + response = self.client.post(url, {'query': 'Test query'}, format='json') + self.assertEqual(response.status_code, 200) + self.assertIn('answer', response.data) + self.assertIn('retrieved_documents', response.data) + + def test_rag_generate_missing_query(self): + # Missing query returns 400 + url = reverse('gemini:rag_generate') + response = self.client.post(url, {}, format='json') + self.assertEqual(response.status_code, 400) + self.assertIn('error', response.data) + + def test_file_upload_no_file(self): + # No file returns 400 + url = reverse('gemini:file_upload') + response = self.client.post(url, {}, format='multipart') + self.assertEqual(response.status_code, 400) + self.assertIn('error', response.data) + + @patch('gpt.views.process_file') + def test_file_upload_success(self, mock_process_file): + # Valid file upload returns 200 + url = reverse('gemini:file_upload') + file = {'file': ('test.txt', b'dummy content', 'text/plain')} + response = self.client.post(url, file, format='multipart') + self.assertEqual(response.status_code, 200) + self.assertIn('file_url', response.data) + mock_process_file.assert_called_once() + + def test_file_upload_with_role_permission_denied(self): + # User with no role gets 403 + self.user.role = None + self.user.save() + url = reverse('gemini:file_upload_with_role_permission') + file = {'file': ('test.txt', b'dummy content', 'text/plain')} + response = self.client.post(url, file, format='multipart') + self.assertEqual(response.status_code, 403) + + @patch('gpt.views.process_file') + def test_file_upload_with_role_permission_success(self, mock_process_file): + # Valid upload with role returns 200 + url = reverse('gemini:file_upload_with_role_permission') + file = {'file': ('test.txt', b'dummy content', 'text/plain')} + response = self.client.post(url, file, format='multipart') + self.assertEqual(response.status_code, 200) + self.assertIn('file_url', response.data) + mock_process_file.assert_called_once() + + def test_file_access_with_permission(self): + # Access allowed with role + url = reverse('gemini:file_access') + response = self.client.get(url) + self.assertEqual(response.status_code, 200) + self.assertIn('message', response.data) + + def test_file_access_without_permission(self): + # Access denied without role + self.user.role = None + self.user.save() + url = reverse('gemini:file_access') + response = self.client.get(url) + self.assertEqual(response.status_code, 403) + + def test_file_delete_missing_filename(self): + # Missing filename returns 400 + url = reverse('gemini:file_delete') + response = self.client.delete(url, {}, format='json') + self.assertEqual(response.status_code, 400) + self.assertIn('error', response.data) + + @patch('django.core.files.storage.FileSystemStorage.delete') + def test_file_delete_success(self, mock_delete): + # Valid delete returns 200 + url = reverse('gemini:file_delete') + mock_delete.return_value = None + response = self.client.delete(url, {'filename': 'test.txt'}, format='json') + self.assertEqual(response.status_code, 200) + self.assertIn('message', response.data) + mock_delete.assert_called_once_with('test.txt') + + @patch('django.core.files.storage.FileSystemStorage.delete') + def test_file_delete_failure(self, mock_delete): + # Delete error returns 500 + url = reverse('gemini:file_delete') + mock_delete.side_effect = Exception('delete failed') + response = self.client.delete(url, {'filename': 'test.txt'}, format='json') + self.assertEqual(response.status_code, 500) + self.assertIn('error', response.data) + + def test_conversation_summary_missing_id(self): + # Missing ID returns 400 + url = reverse('gemini:conversation_summary') + response = self.client.get(url) + self.assertEqual(response.status_code, 400) + self.assertIn('error', response.data) + + @patch('chat.utils.summarizer.generate_summary') + def test_conversation_summary_cache_and_generate(self, mock_generate_summary): + # First call generates and caches summary + from chat.models import Message, Version + version = Version.objects.create(conversation_id=1) + Message.objects.create(version=version, content="Hello", created_at="2025-07-30T10:00:00Z") + Message.objects.create(version=version, content="World", created_at="2025-07-30T10:01:00Z") + mock_generate_summary.return_value = "Summary of conversation" + url = reverse('gemini:conversation_summary') + '?conversation_id=1' + + response1 = self.client.get(url) + self.assertEqual(response1.status_code, 200) + self.assertEqual(response1.data['summary'], "Summary of conversation") + mock_generate_summary.assert_called_once() + + # Second call uses cached summary + mock_generate_summary.reset_mock() + response2 = self.client.get(url) + self.assertEqual(response2.status_code, 200) + self.assertEqual(response2.data['summary'], "Summary of conversation") + mock_generate_summary.assert_not_called() \ No newline at end of file diff --git a/backend/gpt/urls.py b/backend/gpt/urls.py index f4a0f6045..951987f1a 100644 --- a/backend/gpt/urls.py +++ b/backend/gpt/urls.py @@ -1,10 +1,14 @@ from django.urls import path - -from gpt import views +from . import views urlpatterns = [ - path("", views.gpt_root_view), - path("title/", views.get_title), - path("question/", views.get_answer), - path("conversation/", views.get_conversation), + path('', views.gemini_root_view), # Root endpoint + path('title/', views.get_title), # Generate short title from chat + path('answer/', views.get_answer), # Get answer from GPT + path('conversation/', views.get_conversation), # Get full conversation + path('rag/', views.rag_generate), # Retrieval-augmented generation + path('file/upload/', views.file_upload), # Upload file (basic) + path('file/upload/secure/', views.file_upload_with_role_permission), # Secure file upload with role check + path('file/access/', views.file_access), # Access file with permission + path('file/delete/', views.file_delete), # Delete uploaded file ] diff --git a/backend/gpt/utils.py b/backend/gpt/utils.py new file mode 100644 index 000000000..26ca8bd41 --- /dev/null +++ b/backend/gpt/utils.py @@ -0,0 +1,80 @@ +import os +from dotenv import load_dotenv +import google.generativeai as genai + +# Load environment variables from .env file +load_dotenv() + +# Get the Gemini API key from environment variables +api_key = os.getenv("GEMINI_API_KEY") +if not api_key: + raise ValueError("GEMINI_API_KEY not set in environment variables") + +# Initialize Gemini client +genai.configure(api_key=api_key) + +# Default chat parameters for Gemini model +GEMINI_PARAMS = dict( + temperature=0.7, + top_p=0.95, +) + +# Model selection +MODEL = "gemini-1.5-flash" + +def retrieve_documents(query): + """ + Dummy document retrieval function. + Replace this with your actual retrieval logic. + Returns a list of document strings related to the query. + """ + return [f"Document content related to query: {query}"] + +def generate_response(query, context): + """ + Generate a response using Gemini's generate content API + based on the user query and retrieved context. + + Args: + query (str): User's question. + context (str): Retrieved document/context relevant to the query. + + Returns: + str: Generated answer from the model. + """ + prompt = ( + f"Answer the question based on the following context:\n" + f"Context: {context}\n" + f"Question: {query}" + ) + + model = genai.GenerativeModel(MODEL) + response = model.generate_content(prompt, generation_config=genai.GenerationConfig(**GEMINI_PARAMS)) + + return response.text + +def process_file(file_url): + """ + Placeholder function to process a file given by URL. + Replace this with your real file fetching and text extraction logic. + + Args: + file_url (str): URL or path to the file. + + Returns: + str: Extracted text content from the file. + """ + print(f"Processing file: {file_url}") + # TODO: Add real file download and text extraction here + return f"Extracted text content from {file_url}" + +# Example usage (remove or comment out in production) +if __name__ == "__main__": + query = "What is Retrieval-Augmented Generation?" + docs = retrieve_documents(query) + combined_context = " ".join(docs) + answer = generate_response(query, combined_context) + print("Generated Answer:", answer) + + file_content = process_file("https://example.com/sample.pdf") + print("File Content:", file_content) \ No newline at end of file diff --git a/backend/gpt/views.py b/backend/gpt/views.py index e9c81cb2e..4083f0477 100644 --- a/backend/gpt/views.py +++ b/backend/gpt/views.py @@ -1,34 +1,213 @@ -from django.contrib.auth.decorators import login_required +import logging +import os +from django.core.files.storage import FileSystemStorage +from django.core.cache import cache from django.http import JsonResponse, StreamingHttpResponse -from rest_framework.decorators import api_view +from rest_framework.decorators import api_view, permission_classes +from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response +import google.generativeai as genai -from src.utils.gpt import get_conversation_answer, get_gpt_title, get_simple_answer +from chat.utils.summarizer import generate_summary +from chat.models import Message +from .utils import process_file +from .permissions import HasRolePermission +logger = logging.getLogger(__name__) +genai.configure(api_key=os.getenv("GEMINI_API_KEY")) +# Health check endpoint @api_view(["GET"]) -def gpt_root_view(request): - return JsonResponse({"message": "GPT endpoint works!"}) +def gemini_root_view(request): + return JsonResponse({"message": "Gemini endpoint works!"}) -@login_required +# Generate title from user question and chatbot response @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def get_title(request): - data = request.data - title = get_gpt_title(data["user_question"], data["chatbot_response"]) - return JsonResponse({"content": title}) + try: + user_q = request.data.get("user_question") + bot_res = request.data.get("chatbot_response") + if not user_q or not bot_res: + return Response({"error": "Missing user_question or chatbot_response."}, status=400) + model = genai.GenerativeModel('gemini-1.5-flash') + prompt = f"Generate a title for the following conversation: {user_q} {bot_res}" + response = model.generate_content(prompt) + return JsonResponse({"content": response.text}) + except Exception as e: + logger.exception("Error generating title") + return Response({"error": "Failed to generate title."}, status=500) -@login_required +# Simple Gemini prompt/response @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def get_answer(request): - data = request.data - return StreamingHttpResponse(get_simple_answer(data["user_question"], stream=True), content_type="text/html") + try: + prompt = request.data.get("user_question") + if not prompt: + return Response({"error": "Missing user_question."}, status=400) + model = genai.GenerativeModel('gemini-1.5-flash') + response = model.generate_content(prompt, stream=True) + def stream_response(): + for chunk in response: + yield chunk.text -@login_required + return StreamingHttpResponse(stream_response(), content_type="text/html") + except Exception as e: + logger.exception("Error generating answer") + return Response({"error": "Failed to generate answer."}, status=500) + + +# Handle full conversation context with Gemini @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def get_conversation(request): - data = request.data - return StreamingHttpResponse( - get_conversation_answer(data["conversation"], data["model"], stream=True), content_type="text/html" - ) + try: + conversation = request.data.get("conversation") + if not conversation: + return Response({"error": "Missing conversation."}, status=400) + model = genai.GenerativeModel('gemini-1.5-flash') + prompt = "\n".join(conversation) + response = model.generate_content(prompt, stream=True) + + def stream_response(): + for chunk in response: + yield chunk.text + + return StreamingHttpResponse(stream_response(), content_type="text/html") + except Exception as e: + logger.exception("Error generating conversation answer") + return Response({"error": "Failed to generate conversation answer."}, status=500) + + +# Dummy document retriever (to be replaced) +def retrieve_documents(query): + return ["Document content related to query..."] + + +# Dummy response generator using retrieved docs +def generate_response(query, context): + return f"Answer based on '{query}' with context." + + +# RAG: combine retrieval + Gemini answer +@api_view(['POST']) +@permission_classes([IsAuthenticated]) +def rag_generate(request): + query = request.data.get('query') + if not query: + return Response({"error": "Query is required."}, status=400) + try: + docs = retrieve_documents(query) + context = " ".join(docs) + model = genai.GenerativeModel('gemini-1.5-flash') + prompt = f"{query} {context}" + response = model.generate_content(prompt) + return Response({ + "query": query, + "answer": response.text, + "retrieved_documents": docs, + }) + except Exception as e: + logger.exception("RAG generation failed") + return Response({"error": "Failed to generate RAG answer."}, status=500) + + +# Basic file upload without permission check +@api_view(['POST']) +@permission_classes([IsAuthenticated]) +def file_upload(request): + file = request.FILES.get('file') + if not file: + return Response({"error": "File is required."}, status=400) + try: + fs = FileSystemStorage() + filename = fs.save(file.name, file) + file_url = fs.url(filename) + process_file(file_url) + logger.info(f"File uploaded: {file.name}") + return Response({"file_url": file_url}) + except Exception as e: + logger.exception("File upload failed") + return Response({"error": "Failed to upload file."}, status=500) + + +# Secure file upload (requires role permission) +@api_view(['POST']) +@permission_classes([IsAuthenticated, HasRolePermission]) +def file_upload_with_role_permission(request): + file = request.FILES.get('file') + if not file: + return Response({"error": "File is required."}, status=400) + try: + fs = FileSystemStorage() + filename = fs.save(file.name, file) + file_url = fs.url(filename) + process_file(file_url) + logger.info(f"File uploaded with permission: {file.name}") + return Response({"file_url": file_url}) + except Exception as e: + logger.exception("File upload with permission failed") + return Response({"error": "Failed to upload file."}, status=500) + +file_upload_with_role_permission.permission_name = "file_upload" + + +# Dummy file access (with role permission) +@api_view(['GET']) +@permission_classes([IsAuthenticated, HasRolePermission]) +def file_access(request): + try: + logger.info(f"Files accessed by user: {request.user}") + return Response({"message": "Files accessed"}) + except Exception as e: + logger.exception("File access failed") + return Response({"error": "Failed to access files."}, status=500) + +file_access.permission_name = "file_access" + + +# Delete uploaded file by name +@api_view(['DELETE']) +@permission_classes([IsAuthenticated, HasRolePermission]) +def file_delete(request): + file_name = request.data.get("filename") + if not file_name: + return Response({"error": "Filename required."}, status=400) + fs = FileSystemStorage() + try: + fs.delete(file_name) + logger.info(f"File deleted: {file_name}") + return Response({"message": f"{file_name} deleted."}) + except Exception as e: + logger.exception("File deletion failed") + return Response({"error": "Failed to delete file."}, status=500) + +file_delete.permission_name = "file_delete" + + +# Summarize conversation messages by conversation_id +@api_view(['GET']) +@permission_classes([IsAuthenticated]) +def conversation_summary(request): + conversation_id = request.GET.get('conversation_id') + if not conversation_id: + return Response({"error": "conversation_id is required"}, status=400) + try: + summary = cache.get(conversation_id) + if summary is None: + messages = Message.objects.filter( + version__conversation_id=conversation_id + ).order_by("created_at") + if not messages.exists(): + return Response({"error": "No messages found."}, status=404) + full_text = " ".join(msg.content for msg in messages) + summary = generate_summary(full_text) + cache.set(conversation_id, summary, timeout=60 * 15) + return Response({"summary": summary}) + except Exception as e: + logger.exception("Summary generation failed") + return Response({"error": "Failed to generate summary."}, status=500) diff --git a/backend/src/utils/gpt.py b/backend/src/utils/gpt.py index f8a4aa023..cb17f7de4 100644 --- a/backend/src/utils/gpt.py +++ b/backend/src/utils/gpt.py @@ -1,77 +1,95 @@ from dataclasses import dataclass +from openai import OpenAI +from dotenv import load_dotenv +import os -from src.libs import openai + +# Load .env file (from project root) +load_dotenv(override=True) + +# Initialize OpenAI client with API key from environment +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +# Default chat parameters GPT_40_PARAMS = dict( temperature=0.7, top_p=0.95, frequency_penalty=0, presence_penalty=0, stop=None, - stream=False, ) - +# GPT model mapping (ONLY valid models as of 2025) @dataclass class GPTVersion: name: str - engine: str - + model: str GPT_VERSIONS = { - "gpt35": GPTVersion("gpt35", "gpt-35-turbo-0613"), - "gpt35-16k": GPTVersion("gpt35-16k", "gpt-35-turbo-16k"), - "gpt4": GPTVersion("gpt4", "gpt-4-0613"), - "gpt4-32k": GPTVersion("gpt4-32k", "gpt4-32k-0613"), + "gpt35": GPTVersion("gpt35", "gpt-3.5-turbo"), + "gpt4": GPTVersion("gpt4", "gpt-4"), } - +# Function: simple chat prompt def get_simple_answer(prompt: str, stream: bool = True): - kwargs = {**GPT_40_PARAMS, **dict(stream=stream)} - - for resp in openai.ChatCompletion.create( - engine=GPT_VERSIONS["gpt35"].engine, - messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}], + kwargs = {**GPT_40_PARAMS, "stream": stream} + response = client.chat.completions.create( + model=GPT_VERSIONS["gpt35"].model, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt} + ], **kwargs, - ): - choices = resp.get("choices", []) - if not choices: - continue - chunk = choices.pop()["delta"].get("content") - if chunk: - yield chunk - - -def get_gpt_title(prompt: str, response: str): - sys_msg: str = ( - "As an AI Assistant your goal is to make very short title, few words max for a conversation between user and " - "chatbot. You will be given the user's question and chatbot's first response and you will return only the " - "resulting title. Always return some raw title and nothing more." ) - usr_msg = f'user_question: "{prompt}"\n' f'chatbot_response: "{response}"' - - response = openai.ChatCompletion.create( - engine=GPT_VERSIONS["gpt35"].engine, - messages=[{"role": "system", "content": sys_msg}, {"role": "user", "content": usr_msg}], - **GPT_40_PARAMS, + if stream: + for chunk in response: + content = chunk.choices[0].delta.get("content") + if content: + yield content + else: + return response.choices[0].message.content + +# Function: generate a short title from prompt & response +def get_gpt_title(prompt: str, response: str): + system_msg = ( + "As an AI Assistant your goal is to make a very short title, a few words max, " + "for a conversation between user and chatbot. You will be given the user's question " + "and chatbot's first response. Return only the resulting title — raw, no formatting." ) - - result = response["choices"][0]["message"]["content"].replace('"', "") - return result - - + user_msg = f'user_question: "{prompt}"\nchatbot_response: "{response}"' + + try: + result = client.chat.completions.create( + model=GPT_VERSIONS["gpt35"].model, + messages=[ + {"role": "system", "content": system_msg}, + {"role": "user", "content": user_msg} + ], + **GPT_40_PARAMS, + ) + return result.choices[0].message.content.strip().replace('"', "") + + except Exception as e: + error_str = str(e).lower() + if "rate limit" in error_str or "quota" in error_str or "insufficient_quota" in error_str: + return "Quota exceeded, please try again later." + return "Failed to generate title." + + +# Function: use full conversation context def get_conversation_answer(conversation: list[dict[str, str]], model: str, stream: bool = True): - kwargs = {**GPT_40_PARAMS, **dict(stream=stream)} - engine = GPT_VERSIONS[model].engine - - for resp in openai.ChatCompletion.create( - engine=engine, - messages=[{"role": "system", "content": "You are a helpful assistant."}, *conversation], + kwargs = {**GPT_40_PARAMS, "stream": stream} + selected_model = GPT_VERSIONS[model].model + response = client.chat.completions.create( + model=selected_model, + messages=[{"role": "system", "content": "You are a helpful assistant."}] + conversation, **kwargs, - ): - choices = resp.get("choices", []) - if not choices: - continue - chunk = choices.pop()["delta"].get("content") - if chunk: - yield chunk + ) + if stream: + for chunk in response: + content = chunk.choices[0].delta.get("content") + if content: + yield content + else: + return response.choices[0].message.content diff --git a/backend/uploads/Python-Interview-Questions-Answers-Free-PDF.pdf b/backend/uploads/Python-Interview-Questions-Answers-Free-PDF.pdf new file mode 100644 index 000000000..ed8a9c189 Binary files /dev/null and b/backend/uploads/Python-Interview-Questions-Answers-Free-PDF.pdf differ diff --git a/backend/uploads/Python-Interview-Questions-Answers-Free-PDF_RRl5l2W.pdf b/backend/uploads/Python-Interview-Questions-Answers-Free-PDF_RRl5l2W.pdf new file mode 100644 index 000000000..ed8a9c189 Binary files /dev/null and b/backend/uploads/Python-Interview-Questions-Answers-Free-PDF_RRl5l2W.pdf differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..5c77857e1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +Django>=3.2,<4.0 +djangorestframework +transformers +torch +chromadb==0.4.22 +python-dotenv +django core-headers +psycopg2-binary +django-celery-beat + +