Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,9 @@ def make_lms_template_path(settings):

'openedx_events',

# Core models to represent courses
"openedx_catalog",

# Core apps that power libraries
"openedx_content",
*openedx_content_backcompat_apps_to_install(),
Expand Down
3 changes: 3 additions & 0 deletions lms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,9 @@

'openedx_events',

# Core models to represent courses
"openedx_catalog",

# Core apps that power libraries
"openedx_content",
*openedx_content_backcompat_apps_to_install(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""
Data migration to populate the new CourseRun and CatalogCourse models.
"""

# Generated by Django 5.2.11 on 2026-02-13 21:47
import logging

from django.conf import settings
from django.db import migrations
from organizations.api import ensure_organization, exceptions as org_exceptions

log = logging.getLogger(__name__)


def backfill_openedx_catalog(apps, schema_editor):
"""
Populate the new CourseRun and CatalogCourse models.
"""
# CourseOverview is a cache model derived from modulestore; modulestore is the source of truth for courses, so we'll
# use it to get the list of "all courses on the system" to populate the new CourseRun and CatalogCourse models.
CourseIndex = apps.get_model("split_modulestore_django", "SplitModulestoreCourseIndex")
CourseOverview = apps.get_model("course_overviews", "CourseOverview")
CatalogCourse = apps.get_model("openedx_catalog", "CatalogCourse")
CourseRun = apps.get_model("openedx_catalog", "CourseRun")

created_catalog_course_ids: set[int] = set()
all_course_runs = CourseIndex.objects.filter(base_store="mongodb", library_version="").order_by("course_id")
for course_run in all_course_runs:
org_code: str = course_run.course_id.org
course_code: str = course_run.course_id.course
run_code: str = course_run.course_id.run

# Ensure that the Organization exists.
try:
org_data = ensure_organization(org_code)
except org_exceptions.InvalidOrganizationException as exc:
# Note: IFF the org exists among the modulestore courses but not in the Organizations database table,
# and if auto-create is disabled (it's enabled by default), this will raise InvalidOrganizationException. It
# would be up to the operator to decide how they want to resolve that.
raise ValueError(
f'The organization short code "{org_code}" exists in modulestore ({course_run.course_id}) but '
"not the Organizations table, and auto-creating organizations is disabled. You can resolve this by "
"creating the Organization manually (e.g. from the Django admin) or turning on auto-creation. "
"You can set active=False to prevent this Organization from being used other than for historical data. "
)
if org_data["short_name"] != org_code:
# On most installations, the 'short_code' database column is case insensitive (unfortunately)
log.warning(
'The course with ID "%s" does not match its Organization.short_code "%s"',
course_run.course_id,
org_data["short_name"],
)

# Fetch the CourseOverview if it exists
try:
course_overview = CourseOverview.objects.get(id=course_run.course_id)
except CourseOverview.DoesNotExist:
course_overview = None # Course exists in modulestore but details aren't cached into CourseOverview yet
display_name: str = (course_overview.display_name if course_overview else None) or course_code

# Determine the course language.
language = settings.LANGUAGE_CODE
if course_overview and course_overview.language:
language = course_overview.language.lower()
if len(language) > 2 and language[2] == "_":
language[2] = "-" # Ensure we use hyphens for consistency (`en-us` not `en_us`)
if len(language) > 2 and language[2] not in ("-", "@"):
# This seems like an invalid value; revert to the default:
log.warning(
'The course with ID "%s" has invalid language "%s" - using default language "%s" instead.',
course_run.course_id,
language,
settings.LANGUAGE_CODE,
)
language = settings.LANGUAGE_CODE

# Ensure that the CatalogCourse exists.
cc, cc_created = CatalogCourse.objects.get_or_create(
org_id=org_data["id"],
course_code=course_code,
defaults={
"display_name": display_name,
"language": language,
},
)
if cc_created:
created_catalog_course_ids.add(cc.pk)
elif cc.pk in created_catalog_course_ids:
# This CatalogCourse was previously created during this same migration
# Check if all the runs have the same display_name:
if (
course_overview
and course_overview.display_name
and course_overview.display_name != cc.display_name
and cc.display_name != course_code
):
# The runs have different names, so just use the course code as the common catalog course name.
cc.display_name = course_code
cc.save(update_fields=["display_name"])

if cc.course_code != course_code:
raise ValueError(
f"The course {course_run.course_id} exists in modulestore with a different capitalization of its "
f'course code compared to other instances of the same run ("{course_code}" vs "{cc.course_code}"). '
'This really should not happen. To fix it, delete the inconsistent course runs (!). '
)

# Create the CourseRun
new_run, run_created = CourseRun.objects.get_or_create(
catalog_course=cc,
run=run_code,
course_id=course_run.course_id,
defaults={"display_name": display_name},
)

# Correct the "created" timestamp. Since it has auto_now_add=True, we can't set its value except using update()
# The CourseOverview should have the "created" date unless it's missing or the course was created before
# the CourseOverview model existed. In any case, it should be good enough. Otherwise use the default (now).
if course_overview:
if course_overview.created < cc.created and cc.pk in created_catalog_course_ids:
# Use the 'created' date from the oldest course run that we process.
CatalogCourse.objects.filter(pk=cc.pk).update(created=course_overview.created)
if run_created:
CourseRun.objects.filter(pk=new_run.pk).update(created=course_overview.created)


class Migration(migrations.Migration):
dependencies = [
("openedx_catalog", "0001_initial"),
("course_overviews", "0029_alter_historicalcourseoverview_options"),
("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"),
]

operations = [
migrations.RunPython(backfill_openedx_catalog, reverse_code=migrations.RunPython.noop),
]
Loading