Skip to content

Commit

Permalink
feat: Update supported cross region inferences (#756)
Browse files Browse the repository at this point in the history
* Add a script to get supported cross region inferences

* Use available cross region inference more accurately
  • Loading branch information
sztm authored Mar 3, 2025
1 parent 8d2e925 commit 65445a0
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 22 deletions.
173 changes: 151 additions & 22 deletions backend/app/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,36 +325,165 @@ def get_model_id(
"amazon-nova-micro": "amazon.nova-micro-v1:0",
}

# Made this list by scripts/cross_region_inference/get_supported_cross_region_inferences.py
# Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html
cross_region_inference_models = {
"claude-v3-sonnet",
"claude-v3-haiku",
"claude-v3-opus",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
"claude-v3.7-sonnet",
"claude-v3.5-haiku",
"amazon-nova-pro",
"amazon-nova-lite",
"amazon-nova-micro",
}

supported_region_prefixes = {
"us-east-1": "us",
"us-west-2": "us",
"eu-west-1": "eu",
"eu-central-1": "eu",
"eu-west-3": "eu",
supported_regions = {
"us-east-1": {
"area": "us",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-opus",
"claude-v3-sonnet",
"claude-v3.5-haiku",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
"claude-v3.7-sonnet",
],
},
"us-east-2": {
"area": "us",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3.5-haiku",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
"claude-v3.7-sonnet",
],
},
"us-west-2": {
"area": "us",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-opus",
"claude-v3-sonnet",
"claude-v3.5-haiku",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
"claude-v3.7-sonnet",
],
},
"eu-central-1": {
"area": "eu",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
],
},
"eu-west-1": {
"area": "eu",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
],
},
"eu-west-2": {"area": "eu", "models": []},
"eu-west-3": {
"area": "eu",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
],
},
"eu-north-1": {
"area": "eu",
"models": ["amazon-nova-lite", "amazon-nova-micro", "amazon-nova-pro"],
},
"ap-south-1": {
"area": "apac",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
],
},
"ap-northeast-1": {
"area": "apac",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
],
},
"ap-northeast-2": {
"area": "apac",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
],
},
"ap-northeast-3": {"area": "apac", "models": ["claude-v3.5-sonnet-v2"]},
"ap-southeast-1": {
"area": "apac",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
],
},
"ap-southeast-2": {
"area": "apac",
"models": [
"amazon-nova-lite",
"amazon-nova-micro",
"amazon-nova-pro",
"claude-v3-haiku",
"claude-v3-sonnet",
"claude-v3.5-sonnet",
"claude-v3.5-sonnet-v2",
],
},
}

base_model_id = base_model_ids.get(model)
if not base_model_id:
raise ValueError(f"Unsupported model: {model}")

model_id = base_model_id
if enable_cross_region and model in cross_region_inference_models:
region_prefix = supported_region_prefixes.get(bedrock_region)
if region_prefix:

if enable_cross_region:
if (
bedrock_region in supported_regions
and model in supported_regions[bedrock_region]["models"]
):
region_prefix = supported_regions[bedrock_region]["area"]
model_id = f"{region_prefix}.{base_model_id}"
logger.info(
f"Using cross-region model ID: {model_id} for model '{model}' in region '{BEDROCK_REGION}'"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import json
import boto3

# Definition of supported base models
supported_base_models = {
"anthropic.claude-v2:1": "claude-v2",
"anthropic.claude-instant-v1": "claude-instant-v1",
"anthropic.claude-3-sonnet-20240229-v1:0": "claude-v3-sonnet",
"anthropic.claude-3-haiku-20240307-v1:0": "claude-v3-haiku",
"anthropic.claude-3-opus-20240229-v1:0": "claude-v3-opus",
"anthropic.claude-3-5-sonnet-20240620-v1:0": "claude-v3.5-sonnet",
"anthropic.claude-3-5-sonnet-20241022-v2:0": "claude-v3.5-sonnet-v2",
"anthropic.claude-3-7-sonnet-20250219-v1:0": "claude-v3.7-sonnet",
"anthropic.claude-3-5-haiku-20241022-v1:0": "claude-v3.5-haiku",
"mistral.mistral-7b-instruct-v0:2": "mistral-7b-instruct",
"mistral.mixtral-8x7b-instruct-v0:1": "mixtral-8x7b-instruct",
"mistral.mistral-large-2402-v1:0": "mistral-large",
"amazon.nova-pro-v1:0": "amazon-nova-pro",
"amazon.nova-lite-v1:0": "amazon-nova-lite",
"amazon.nova-micro-v1:0": "amazon-nova-micro",
}

# Region definitions
regions = {
# US
"us-east-1": {"area": "us", "models": []},
"us-east-2": {"area": "us", "models": []},
"us-west-2": {"area": "us", "models": []},
# EU
"eu-central-1": {"area": "eu", "models": []},
"eu-west-1": {"area": "eu", "models": []},
"eu-west-2": {"area": "eu", "models": []},
"eu-west-3": {"area": "eu", "models": []},
"eu-north-1": {"area": "eu", "models": []},
# APAC
"ap-south-1": {"area": "apac", "models": []},
"ap-northeast-1": {"area": "apac", "models": []},
"ap-northeast-2": {"area": "apac", "models": []},
"ap-northeast-3": {"area": "apac", "models": []},
"ap-southeast-1": {"area": "apac", "models": []},
"ap-southeast-2": {"area": "apac", "models": []},
}


def split_inference_profiles_id(inference_profile_id):
"""Split inference_profile_id into area and model"""
parts = inference_profile_id.split(".")
area = parts[0]
model = ".".join(parts[1:])
return area, model


def list_inference_profiles_for_region(region):
"""Get inference profiles for the specified region"""
session = boto3.Session()
client = session.client("bedrock", region_name=region)

try:
# API call equivalent to ListInferenceProfilesCommand
response = client.list_inference_profiles(
typeEquals="SYSTEM_DEFINED", maxResults=1000
)
return response.get("inferenceProfileSummaries", [])
except Exception as e:
print(f"Error in region {region}: {e}")
return []


def main():
"""Main function"""
for region in regions.keys():
# Call ListInferenceProfilesCommand API in the region
profiles = list_inference_profiles_for_region(region)

for profile in profiles:
if profile.get("status") == "ACTIVE":
profileModelId = profile.get("inferenceProfileId", [])
# Split inference profile
# For example "us.amazon.nova-pro-v1:0" to ["us", "amazon.nova-pro-v1:0"]
area, modelId = split_inference_profiles_id(profileModelId)
if modelId in supported_base_models and area == regions[region]["area"]:
model = supported_base_models[modelId]
# Append only matched model ids
regions[region]["models"].append(model)

# Sort models
regions[region]["models"].sort()

print(json.dumps(regions, indent=4))


if __name__ == "__main__":
main()

0 comments on commit 65445a0

Please sign in to comment.