diff --git a/cms/envs/common.py b/cms/envs/common.py index 44c4ee4e2f9b..b456579fb622 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -898,6 +898,9 @@ def make_lms_template_path(settings): 'openedx_events', + # Core models to represent courses + "openedx_catalog", + # Core apps that power libraries "openedx_content", *openedx_content_backcompat_apps_to_install(), diff --git a/lms/envs/common.py b/lms/envs/common.py index 917dd025e96f..27e0306711e1 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2020,6 +2020,9 @@ 'openedx_events', + # Core models to represent courses + "openedx_catalog", + # Core apps that power libraries "openedx_content", *openedx_content_backcompat_apps_to_install(), diff --git a/openedx/core/djangoapps/content/course_overviews/migrations/0030_backfill_new_catalog_courseruns.py b/openedx/core/djangoapps/content/course_overviews/migrations/0030_backfill_new_catalog_courseruns.py new file mode 100644 index 000000000000..330f662cdaff --- /dev/null +++ b/openedx/core/djangoapps/content/course_overviews/migrations/0030_backfill_new_catalog_courseruns.py @@ -0,0 +1,136 @@ +""" +Data migration to populate the new CourseRun and CatalogCourse models. +""" + +# Generated by Django 5.2.11 on 2026-02-13 21:47 +import logging + +from django.conf import settings +from django.db import migrations +from organizations.api import ensure_organization, exceptions as org_exceptions + +log = logging.getLogger(__name__) + + +def backfill_openedx_catalog(apps, schema_editor): + """ + Populate the new CourseRun and CatalogCourse models. + """ + # CourseOverview is a cache model derived from modulestore; modulestore is the source of truth for courses, so we'll + # use it to get the list of "all courses on the system" to populate the new CourseRun and CatalogCourse models. + CourseIndex = apps.get_model("split_modulestore_django", "SplitModulestoreCourseIndex") + CourseOverview = apps.get_model("course_overviews", "CourseOverview") + CatalogCourse = apps.get_model("openedx_catalog", "CatalogCourse") + CourseRun = apps.get_model("openedx_catalog", "CourseRun") + + created_catalog_course_ids: set[int] = set() + all_course_runs = CourseIndex.objects.filter(base_store="mongodb", library_version="").order_by("course_id") + for course_run in all_course_runs: + org_code: str = course_run.course_id.org + course_code: str = course_run.course_id.course + run_code: str = course_run.course_id.run + + # Ensure that the Organization exists. + try: + org_data = ensure_organization(org_code) + except org_exceptions.InvalidOrganizationException as exc: + # Note: IFF the org exists among the modulestore courses but not in the Organizations database table, + # and if auto-create is disabled (it's enabled by default), this will raise InvalidOrganizationException. It + # would be up to the operator to decide how they want to resolve that. + raise ValueError( + f'The organization short code "{org_code}" exists in modulestore ({course_run.course_id}) but ' + "not the Organizations table, and auto-creating organizations is disabled. You can resolve this by " + "creating the Organization manually (e.g. from the Django admin) or turning on auto-creation. " + "You can set active=False to prevent this Organization from being used other than for historical data. " + ) + if org_data["short_name"] != org_code: + # On most installations, the 'short_code' database column is case insensitive (unfortunately) + log.warning( + 'The course with ID "%s" does not match its Organization.short_code "%s"', + course_run.course_id, + org_data["short_name"], + ) + + # Fetch the CourseOverview if it exists + try: + course_overview = CourseOverview.objects.get(id=course_run.course_id) + except CourseOverview.DoesNotExist: + course_overview = None # Course exists in modulestore but details aren't cached into CourseOverview yet + display_name: str = (course_overview.display_name if course_overview else None) or course_code + + # Determine the course language. + language = settings.LANGUAGE_CODE + if course_overview and course_overview.language: + language = course_overview.language.lower() + if len(language) > 2 and language[2] == "_": + language[2] = "-" # Ensure we use hyphens for consistency (`en-us` not `en_us`) + if len(language) > 2 and language[2] not in ("-", "@"): + # This seems like an invalid value; revert to the default: + log.warning( + 'The course with ID "%s" has invalid language "%s" - using default language "%s" instead.', + course_run.course_id, + language, + settings.LANGUAGE_CODE, + ) + language = settings.LANGUAGE_CODE + + # Ensure that the CatalogCourse exists. + cc, cc_created = CatalogCourse.objects.get_or_create( + org_id=org_data["id"], + course_code=course_code, + defaults={ + "display_name": display_name, + "language": language, + }, + ) + if cc_created: + created_catalog_course_ids.add(cc.pk) + elif cc.pk in created_catalog_course_ids: + # This CatalogCourse was previously created during this same migration + # Check if all the runs have the same display_name: + if ( + course_overview + and course_overview.display_name + and course_overview.display_name != cc.display_name + and cc.display_name != course_code + ): + # The runs have different names, so just use the course code as the common catalog course name. + cc.display_name = course_code + cc.save(update_fields=["display_name"]) + + if cc.course_code != course_code: + raise ValueError( + f"The course {course_run.course_id} exists in modulestore with a different capitalization of its " + f'course code compared to other instances of the same run ("{course_code}" vs "{cc.course_code}"). ' + 'This really should not happen. To fix it, delete the inconsistent course runs (!). ' + ) + + # Create the CourseRun + new_run, run_created = CourseRun.objects.get_or_create( + catalog_course=cc, + run=run_code, + course_id=course_run.course_id, + defaults={"display_name": display_name}, + ) + + # Correct the "created" timestamp. Since it has auto_now_add=True, we can't set its value except using update() + # The CourseOverview should have the "created" date unless it's missing or the course was created before + # the CourseOverview model existed. In any case, it should be good enough. Otherwise use the default (now). + if course_overview: + if course_overview.created < cc.created and cc.pk in created_catalog_course_ids: + # Use the 'created' date from the oldest course run that we process. + CatalogCourse.objects.filter(pk=cc.pk).update(created=course_overview.created) + if run_created: + CourseRun.objects.filter(pk=new_run.pk).update(created=course_overview.created) + + +class Migration(migrations.Migration): + dependencies = [ + ("openedx_catalog", "0001_initial"), + ("course_overviews", "0029_alter_historicalcourseoverview_options"), + ("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"), + ] + + operations = [ + migrations.RunPython(backfill_openedx_catalog, reverse_code=migrations.RunPython.noop), + ]