diff --git a/backend/app/bedrock.py b/backend/app/bedrock.py index c40de310e..56e04ed3d 100644 --- a/backend/app/bedrock.py +++ b/backend/app/bedrock.py @@ -325,26 +325,151 @@ def get_model_id( "amazon-nova-micro": "amazon.nova-micro-v1:0", } + # Made this list by scripts/cross_region_inference/get_supported_cross_region_inferences.py # Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html - cross_region_inference_models = { - "claude-v3-sonnet", - "claude-v3-haiku", - "claude-v3-opus", - "claude-v3.5-sonnet", - "claude-v3.5-sonnet-v2", - "claude-v3.7-sonnet", - "claude-v3.5-haiku", - "amazon-nova-pro", - "amazon-nova-lite", - "amazon-nova-micro", - } - - supported_region_prefixes = { - "us-east-1": "us", - "us-west-2": "us", - "eu-west-1": "eu", - "eu-central-1": "eu", - "eu-west-3": "eu", + supported_regions = { + "us-east-1": { + "area": "us", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-opus", + "claude-v3-sonnet", + "claude-v3.5-haiku", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + "claude-v3.7-sonnet", + ], + }, + "us-east-2": { + "area": "us", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3.5-haiku", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + "claude-v3.7-sonnet", + ], + }, + "us-west-2": { + "area": "us", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-opus", + "claude-v3-sonnet", + "claude-v3.5-haiku", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + "claude-v3.7-sonnet", + ], + }, + "eu-central-1": { + "area": "eu", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + ], + }, + "eu-west-1": { + "area": "eu", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + ], + }, + "eu-west-2": {"area": "eu", "models": []}, + "eu-west-3": { + "area": "eu", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + ], + }, + "eu-north-1": { + "area": "eu", + "models": ["amazon-nova-lite", "amazon-nova-micro", "amazon-nova-pro"], + }, + "ap-south-1": { + "area": "apac", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + ], + }, + "ap-northeast-1": { + "area": "apac", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + ], + }, + "ap-northeast-2": { + "area": "apac", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + ], + }, + "ap-northeast-3": {"area": "apac", "models": ["claude-v3.5-sonnet-v2"]}, + "ap-southeast-1": { + "area": "apac", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + ], + }, + "ap-southeast-2": { + "area": "apac", + "models": [ + "amazon-nova-lite", + "amazon-nova-micro", + "amazon-nova-pro", + "claude-v3-haiku", + "claude-v3-sonnet", + "claude-v3.5-sonnet", + "claude-v3.5-sonnet-v2", + ], + }, } base_model_id = base_model_ids.get(model) @@ -352,9 +477,13 @@ def get_model_id( raise ValueError(f"Unsupported model: {model}") model_id = base_model_id - if enable_cross_region and model in cross_region_inference_models: - region_prefix = supported_region_prefixes.get(bedrock_region) - if region_prefix: + + if enable_cross_region: + if ( + bedrock_region in supported_regions + and model in supported_regions[bedrock_region]["models"] + ): + region_prefix = supported_regions[bedrock_region]["area"] model_id = f"{region_prefix}.{base_model_id}" logger.info( f"Using cross-region model ID: {model_id} for model '{model}' in region '{BEDROCK_REGION}'" diff --git a/scripts/cross_region_inference/get_supported_cross_region_inferences.py b/scripts/cross_region_inference/get_supported_cross_region_inferences.py new file mode 100644 index 000000000..33b529651 --- /dev/null +++ b/scripts/cross_region_inference/get_supported_cross_region_inferences.py @@ -0,0 +1,93 @@ +import json +import boto3 + +# Definition of supported base models +supported_base_models = { + "anthropic.claude-v2:1": "claude-v2", + "anthropic.claude-instant-v1": "claude-instant-v1", + "anthropic.claude-3-sonnet-20240229-v1:0": "claude-v3-sonnet", + "anthropic.claude-3-haiku-20240307-v1:0": "claude-v3-haiku", + "anthropic.claude-3-opus-20240229-v1:0": "claude-v3-opus", + "anthropic.claude-3-5-sonnet-20240620-v1:0": "claude-v3.5-sonnet", + "anthropic.claude-3-5-sonnet-20241022-v2:0": "claude-v3.5-sonnet-v2", + "anthropic.claude-3-7-sonnet-20250219-v1:0": "claude-v3.7-sonnet", + "anthropic.claude-3-5-haiku-20241022-v1:0": "claude-v3.5-haiku", + "mistral.mistral-7b-instruct-v0:2": "mistral-7b-instruct", + "mistral.mixtral-8x7b-instruct-v0:1": "mixtral-8x7b-instruct", + "mistral.mistral-large-2402-v1:0": "mistral-large", + "amazon.nova-pro-v1:0": "amazon-nova-pro", + "amazon.nova-lite-v1:0": "amazon-nova-lite", + "amazon.nova-micro-v1:0": "amazon-nova-micro", +} + +# Region definitions +regions = { + # US + "us-east-1": {"area": "us", "models": []}, + "us-east-2": {"area": "us", "models": []}, + "us-west-2": {"area": "us", "models": []}, + # EU + "eu-central-1": {"area": "eu", "models": []}, + "eu-west-1": {"area": "eu", "models": []}, + "eu-west-2": {"area": "eu", "models": []}, + "eu-west-3": {"area": "eu", "models": []}, + "eu-north-1": {"area": "eu", "models": []}, + # APAC + "ap-south-1": {"area": "apac", "models": []}, + "ap-northeast-1": {"area": "apac", "models": []}, + "ap-northeast-2": {"area": "apac", "models": []}, + "ap-northeast-3": {"area": "apac", "models": []}, + "ap-southeast-1": {"area": "apac", "models": []}, + "ap-southeast-2": {"area": "apac", "models": []}, +} + + +def split_inference_profiles_id(inference_profile_id): + """Split inference_profile_id into area and model""" + parts = inference_profile_id.split(".") + area = parts[0] + model = ".".join(parts[1:]) + return area, model + + +def list_inference_profiles_for_region(region): + """Get inference profiles for the specified region""" + session = boto3.Session() + client = session.client("bedrock", region_name=region) + + try: + # API call equivalent to ListInferenceProfilesCommand + response = client.list_inference_profiles( + typeEquals="SYSTEM_DEFINED", maxResults=1000 + ) + return response.get("inferenceProfileSummaries", []) + except Exception as e: + print(f"Error in region {region}: {e}") + return [] + + +def main(): + """Main function""" + for region in regions.keys(): + # Call ListInferenceProfilesCommand API in the region + profiles = list_inference_profiles_for_region(region) + + for profile in profiles: + if profile.get("status") == "ACTIVE": + profileModelId = profile.get("inferenceProfileId", []) + # Split inference profile + # For example "us.amazon.nova-pro-v1:0" to ["us", "amazon.nova-pro-v1:0"] + area, modelId = split_inference_profiles_id(profileModelId) + if modelId in supported_base_models and area == regions[region]["area"]: + model = supported_base_models[modelId] + # Append only matched model ids + regions[region]["models"].append(model) + + # Sort models + regions[region]["models"].sort() + + print(json.dumps(regions, indent=4)) + + +if __name__ == "__main__": + main()