SciCodes · kellytea · Sep 30, 2025 · sgfost · Oct 1, 2025
diff --git a/codemeticulous/ai_convert.py b/codemeticulous/ai_convert.py
@@ -0,0 +1,83 @@
+from codemeticulous.codemeta.models import CodeMeta
+from codemeticulous.datacite.models import DataCite
+from codemeticulous.cff.models import CitationFileFormat
+from litellm import completion
+import json
+import os
+
+STANDARDS = {
+    "codemeta": {
+        "model": CodeMeta,
+        "format": "json"
+    },
+    "datacite": {
+        "model": DataCite,
+        "format": "json"
+    },
+    "cff": {
+        "model": CitationFileFormat,
+        "format": "yaml"
+    },
+}
+
+def convert_ai(model: str, key: str, source_format: str, target_format: str, source_data):
+    """
+    Automate metadata standard conversion using LLM and canonical representation.
+
+    Args:
+    - source_format: string representation of the source metadata standard.
+    - target_format: string representation of the target metadata standard.
+    - model: LLM model string (e.g., "openrouter/openai/gpt-4o")
+    - source_data: dict or pydantic.BaseModel instance representing the source metadata
+    - custom_fields: additional fields to add to the target metadata instance
+    """
+
+    # Build prompt messages using pydantic schemas and the source data
+    source_model = STANDARDS[source_format]["model"]
+    target_model = STANDARDS[target_format]["model"]
+
+    # Creates pydantic model instance of source data -- might be unnecessary
+    if isinstance(source_data, dict):
+        source_instance = source_model(**source_data)
+    elif isinstance(source_data, source_model):
+        source_instance = source_data
+
+    messages = prompt_generator(source_instance, source_model, target_model)
+
+    # FIXME: adjust configuration for LiteLLM's standard env var lookup
+    os.environ['OPENROUTER_API_KEY'] = key
+
+    # Call the LLM via litellm completion function
+    response = completion(
+        model=model, # need to add guardrails to ensure that model string is valid before attempting a completion call
+        messages=messages,
+    )
+
+    # Extract assistant text from LLM response
+    assistant_text = response.get("choices", [{}])[0].get("message", {}).get("content") if isinstance(response, dict) else response
+
+    #TODO: Output response and see how it can be parsed + validated
+    print("LLM response:", assistant_text)
+
+
+def prompt_generator(source_dict, source_model, target_model) -> list:
+    system = ( # Defining the model's 
+        "You are a metadata conversion assistant using strictly the source and target schema models. ALWAYS return JSON only."
+        "Do not include any explanatory text outside the JSON.\n"
+        "If a source property cannot be mapped, add it to 'unmapped_properties' and explain in 'unmapped_explanations'.\n"
+        "RESPONSE FORMAT (JSON only):\n"
+        '{"converted": {...}, "unmapped_properties": ["property"], "unmapped_explanations": {"property":"reason"} }\n'
+
+    )
+
+    user = ( # Generating user query with given input for conversion
+        "Convert the SOURCE_DATA to match TARGET_MODEL_SCHEMA.\n"
+        "SOURCE_DATA:\n" + json.dumps(source_dict) + "\n\n"
+        "SOURCE_MODEL_SCHEMA:\n" + json.dumps(source_model.schema()) + "\n\n"
+        "TARGET_MODEL_SCHEMA:\n" + json.dumps(target_model.schema()) + "\n\n"
+    )
+
+    return [
+        {"role": "system", "content": system},
+        {"role": "user", "content": user},
+    ]
diff --git a/codemeticulous/cli.py b/codemeticulous/cli.py
@@ -5,7 +5,7 @@
 import yaml
 
 from codemeticulous.convert import STANDARDS, convert as _convert
-
+from codemeticulous.ai_convert import convert_ai as _convert_ai
 
 @click.group()
 def cli():
@@ -137,3 +137,83 @@ def load_file_autodetect(file_path):
                 raise ValueError(f"Unsupported file extension: {ext}.")
     except Exception as e:
         raise ValueError(f"Failed to load file: {file_path}. {str(e)}")
+
+@cli.command()
+@click.option(
+    "-m",
+    "--model",
+    "llm_model",
+    type=str,
+    required=True,
+    help="LLM model to use for conversion (e.g., 'openrouter/openai/gpt-4o')",
+)
+@click.option(
+    "-k",
+    "--key",
+    "api_key",
+    type=str,
+    required=True,
+    help="API key for LLM authorization",
+)
+@click.option(
+    "-f",
+    "--from",
+    "source_format",
+    type=click.Choice(STANDARDS.keys()),
+    required=True,
+    help="Source format",
+)
+@click.option(
+    "-t",
+    "--to",
+    "target_format",
+    type=click.Choice(STANDARDS.keys()),
+    required=True,
+    help="Target format",
+)
+@click.option(
+    "-o",
+    "--output",
+    "output_file",
+    type=click.File("w"),
+    default=None,
+    help="Output file name (by default prints to stdout)",
+)
+@click.option(
+    "-v",
+    "--verbose",
+    is_flag=True,
+    default=False,
+    help="Print verbose output",
+)
+@click.argument("input_file", type=click.Path(exists=True))
+def ai_convert(model: str, key: str, source_format: str, target_format: str, input_file, output_file, verbose):
+    try:
+        input_data = load_file_autodetect(input_file)
+    except Exception as e:
+        click.echo(f"Failed to load file: {input_file}. {str(e)}", err=True)
+        if verbose:
+            traceback.print_exc()
+    try:
+        converted_data = _convert_ai(model, key, source_format, target_format, input_data)
+    except Exception as e:
+        click.echo(f"Error during AI-assisted conversion: {str(e)}", err=True)
+        if verbose:
+            traceback.print_exc()
+        return
+
+    output_format = STANDARDS[target_format]["format"]
+
+    try:
+        output_data = dump_data(converted_data, output_format)
+    except Exception as e:
+        click.echo(f"Error during serialization: {str(e)}", err=True)
+        if verbose:
+            traceback.print_exc()
+        return
+
+    if output_file:
+        output_file.write(output_data)
+        click.echo(f"Data written to {output_file.name}")
+    else:
+        click.echo(output_data)
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,6 +6,7 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "click>=8.1.7",
+    "litellm>=1.77.1",
     "pydantic2-schemaorg==0.2.0",
     "pydantic>=2.9.2",
     "pyyaml>=6.0.2",
@@ -30,4 +31,4 @@ include-package-data = false
 
 [tool.setuptools.packages.find]
 include = ["codemeticulous", "codemeticulous.*"]
-exclude = ["tests*", "schema"]
+exclude = ["tests*", "schema"]