From 7adccdedb3e26bdc7449566e78026efa5883784d Mon Sep 17 00:00:00 2001
From: Tanvika Boyineni <tanvikab@amazon.com>
Date: Wed, 6 Aug 2025 11:42:00 -0700
Subject: [PATCH 1/4] fix: config logic support

---
 src/sagemaker/jumpstart/factory/estimator.py |  1 +
 src/sagemaker/jumpstart/factory/model.py     | 12 ++++++++----
 src/sagemaker/jumpstart/types.py             |  8 ++++----
 src/sagemaker/jumpstart/utils.py             |  9 +++++++--
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/sagemaker/jumpstart/factory/estimator.py b/src/sagemaker/jumpstart/factory/estimator.py
index 051cda0f4a..92fdb4d6a1 100644
--- a/src/sagemaker/jumpstart/factory/estimator.py
+++ b/src/sagemaker/jumpstart/factory/estimator.py
@@ -837,6 +837,7 @@ def _add_config_name_to_kwargs(
 
     kwargs.config_name = kwargs.config_name or get_top_ranked_config_name(
         scope=JumpStartScriptScope.TRAINING,
+        instance_type=kwargs.instance_type,
         **get_model_info_default_kwargs(kwargs, include_config_name=False),
     )
 
diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py
index 53ded3f275..c5dab29c16 100644
--- a/src/sagemaker/jumpstart/factory/model.py
+++ b/src/sagemaker/jumpstart/factory/model.py
@@ -559,6 +559,7 @@ def _add_config_name_to_init_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpSta
     kwargs.config_name = kwargs.config_name or get_top_ranked_config_name(
         **get_model_info_default_kwargs(kwargs, include_config_name=False),
         scope=JumpStartScriptScope.INFERENCE,
+        instance_type=kwargs.instance_type,
     )
 
     if kwargs.config_name is None:
@@ -618,6 +619,7 @@ def _add_config_name_to_deploy_kwargs(
         default_config_name = kwargs.config_name or get_top_ranked_config_name(
             **get_model_info_default_kwargs(kwargs, include_config_name=False),
             scope=JumpStartScriptScope.INFERENCE,
+            instance_type=kwargs.instance_type,
         )
 
     kwargs.config_name = kwargs.config_name or default_config_name
@@ -927,6 +929,12 @@ def get_init_kwargs(
 
     model_init_kwargs = _add_vulnerable_and_deprecated_status_to_kwargs(kwargs=model_init_kwargs)
     model_init_kwargs = _add_model_version_to_kwargs(kwargs=model_init_kwargs)
+
+    # Add instance type before config selection so config compatibility can be checked
+    model_init_kwargs = _add_instance_type_to_kwargs(
+        kwargs=model_init_kwargs, disable_instance_type_logging=disable_instance_type_logging
+    )
+
     model_init_kwargs = _add_config_name_to_init_kwargs(kwargs=model_init_kwargs)
 
     model_init_kwargs = _add_sagemaker_session_with_custom_user_agent_to_kwargs(
@@ -936,10 +944,6 @@ def get_init_kwargs(
 
     model_init_kwargs = _add_model_name_to_kwargs(kwargs=model_init_kwargs)
 
-    model_init_kwargs = _add_instance_type_to_kwargs(
-        kwargs=model_init_kwargs, disable_instance_type_logging=disable_instance_type_logging
-    )
-
     model_init_kwargs = _add_image_uri_to_kwargs(kwargs=model_init_kwargs)
 
     if hub_arn:
diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py
index 5b45b21bd8..b0cea6eb02 100644
--- a/src/sagemaker/jumpstart/types.py
+++ b/src/sagemaker/jumpstart/types.py
@@ -1723,10 +1723,10 @@ def get_top_config_from_ranking(
             ranked_config_names = rankings.rankings
         for config_name in ranked_config_names:
             resolved_config = self.configs[config_name].resolved_config
-            if instance_type and instance_type not in getattr(
-                resolved_config, instance_type_attribute
-            ):
-                continue
+            if instance_type:
+                supported_instance_types = getattr(resolved_config, instance_type_attribute, [])
+                if supported_instance_types and instance_type not in supported_instance_types:
+                    continue
             return self.configs[config_name]
 
         return None
diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py
index 15f9e9b52e..b7c22bbda6 100644
--- a/src/sagemaker/jumpstart/utils.py
+++ b/src/sagemaker/jumpstart/utils.py
@@ -1233,9 +1233,14 @@ def get_top_ranked_config_name(
     tolerate_vulnerable_model: bool = False,
     hub_arn: Optional[str] = None,
     ranking_name: enums.JumpStartConfigRankingName = enums.JumpStartConfigRankingName.DEFAULT,
+    instance_type: Optional[str] = None,
 ) -> Optional[str]:
     """Returns the top ranked config name for the given model ID and region.
 
+    Args:
+        instance_type (Optional[str]): The instance type to filter configs by compatibility.
+            If provided, only configs that support this instance type will be considered.
+
     Raises:
         ValueError: If the script scope is not supported by JumpStart.
     """
@@ -1254,7 +1259,7 @@ def get_top_ranked_config_name(
     if scope == enums.JumpStartScriptScope.INFERENCE:
         return (
             model_specs.inference_configs.get_top_config_from_ranking(
-                ranking_name=ranking_name
+                ranking_name=ranking_name, instance_type=instance_type
             ).config_name
             if model_specs.inference_configs
             else None
@@ -1262,7 +1267,7 @@ def get_top_ranked_config_name(
     if scope == enums.JumpStartScriptScope.TRAINING:
         return (
             model_specs.training_configs.get_top_config_from_ranking(
-                ranking_name=ranking_name
+                ranking_name=ranking_name, instance_type=instance_type
             ).config_name
             if model_specs.training_configs
             else None

From d6c280f729a78caa96c6812e10a0c8f08b457fc8 Mon Sep 17 00:00:00 2001
From: Tanvika Boyineni <tanvikab@amazon.com>
Date: Wed, 6 Aug 2025 12:30:08 -0700
Subject: [PATCH 2/4] fix: adding auto resolution config support

---
 specfileex                               | 2960 ++++++++++++++++++++++
 src/sagemaker/jumpstart/factory/model.py |   10 +-
 src/sagemaker/jumpstart/types.py         |    7 +-
 src/sagemaker/jumpstart/utils.py         |    4 -
 test_unified_model_card.py               |  193 ++
 5 files changed, 3163 insertions(+), 11 deletions(-)
 create mode 100644 specfileex
 create mode 100644 test_unified_model_card.py

diff --git a/specfileex b/specfileex
new file mode 100644
index 0000000000..e2d15647d0
--- /dev/null
+++ b/specfileex
@@ -0,0 +1,2960 @@
+{
+    "model_id": "meta-textgeneration-llama-2-7b-f",
+    "provider": "meta",
+    "url": "https://ai.meta.com/resources/models-and-libraries/llama-downloads/",
+    "version": "4.19.0",
+    "min_sdk_version": "2.225.0",
+    "training_supported": true,
+    "incremental_training_supported": true,
+    "hosting_ecr_specs": {
+        "framework": "huggingface-llm",
+        "framework_version": "2.0.0",
+        "py_version": "py310"
+    },
+    "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
+    "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
+    "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
+    "hosting_prepacked_artifact_version": "1.1.0",
+    "hosting_use_script_uri": false,
+    "hosting_eula_key": "fmhMetadata/eula/llamaEula.txt",
+    "inference_vulnerable": false,
+    "inference_dependencies": [],
+    "inference_vulnerabilities": [],
+    "training_vulnerable": false,
+    "training_dependencies": [
+        "accelerate==0.33.0",
+        "bitsandbytes==0.39.1",
+        "black==23.7.0",
+        "brotli==1.0.9",
+        "datasets==2.14.1",
+        "docstring-parser==0.16",
+        "fire==0.5.0",
+        "huggingface-hub==0.24.2",
+        "inflate64==0.3.1",
+        "loralib==0.1.1",
+        "multivolumefile==0.2.3",
+        "mypy-extensions==1.0.0",
+        "nvidia-cublas-cu12==12.1.3.1",
+        "nvidia-cuda-cupti-cu12==12.1.105",
+        "nvidia-cuda-nvrtc-cu12==12.1.105",
+        "nvidia-cuda-runtime-cu12==12.1.105",
+        "nvidia-cudnn-cu12==8.9.2.26",
+        "nvidia-cufft-cu12==11.0.2.54",
+        "nvidia-curand-cu12==10.3.2.106",
+        "nvidia-cusolver-cu12==11.4.5.107",
+        "nvidia-cusolver-cu12==11.4.5.107",
+        "nvidia-cusparse-cu12==12.1.0.106",
+        "nvidia-nccl-cu12==2.19.3",
+        "nvidia-nvjitlink-cu12==12.3.101",
+        "nvidia-nvtx-cu12==12.1.105",
+        "pathspec==0.11.1",
+        "peft==0.4.0",
+        "py7zr==0.20.5",
+        "pybcj==1.0.1",
+        "pycryptodomex==3.18.0",
+        "pyppmd==1.0.0",
+        "pyzstd==0.15.9",
+        "safetensors==0.4.2",
+        "sagemaker_jumpstart_huggingface_script_utilities==1.2.7",
+        "sagemaker_jumpstart_script_utilities==1.1.9",
+        "scipy==1.11.1",
+        "shtab==1.7.1",
+        "termcolor==2.3.0",
+        "texttable==1.6.7",
+        "tokenize-rt==5.1.0",
+        "tokenizers==0.19.1",
+        "torch==2.2.0",
+        "transformers==4.43.1",
+        "triton==2.2.0",
+        "trl==0.8.1",
+        "typing-extensions==4.8.0",
+        "tyro==0.7.3"
+    ],
+    "training_vulnerabilities": [],
+    "deprecated": false,
+    "hyperparameters": [
+        {
+            "name": "int8_quantization",
+            "type": "text",
+            "default": "False",
+            "options": [
+                "True",
+                "False"
+            ],
+            "scope": "algorithm"
+        },
+        {
+            "name": "enable_fsdp",
+            "type": "text",
+            "default": "True",
+            "options": [
+                "True",
+                "False"
+            ],
+            "scope": "algorithm"
+        },
+        {
+            "name": "epoch",
+            "type": "int",
+            "default": 1,
+            "min": 1,
+            "max": 1000,
+            "scope": "algorithm"
+        },
+        {
+            "name": "learning_rate",
+            "type": "float",
+            "default": 0.0001,
+            "min": 1e-08,
+            "max": 1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "lora_r",
+            "type": "int",
+            "default": 8,
+            "min": 1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "lora_alpha",
+            "type": "int",
+            "default": 32,
+            "min": 1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "target_modules",
+            "type": "text",
+            "default": "q_proj,v_proj",
+            "scope": "algorithm"
+        },
+        {
+            "name": "lora_dropout",
+            "type": "float",
+            "default": 0.05,
+            "min": 0,
+            "max": 1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "instruction_tuned",
+            "type": "text",
+            "default": "False",
+            "options": [
+                "True",
+                "False"
+            ],
+            "scope": "algorithm"
+        },
+        {
+            "name": "chat_dataset",
+            "type": "text",
+            "default": "True",
+            "options": [
+                "True",
+                "False"
+            ],
+            "scope": "algorithm"
+        },
+        {
+            "name": "add_input_output_demarcation_key",
+            "type": "text",
+            "default": "True",
+            "options": [
+                "True",
+                "False"
+            ],
+            "scope": "algorithm"
+        },
+        {
+            "name": "per_device_train_batch_size",
+            "type": "int",
+            "default": 1,
+            "min": 1,
+            "max": 1000,
+            "scope": "algorithm"
+        },
+        {
+            "name": "per_device_eval_batch_size",
+            "type": "int",
+            "default": 1,
+            "min": 1,
+            "max": 1000,
+            "scope": "algorithm"
+        },
+        {
+            "name": "max_train_samples",
+            "type": "int",
+            "default": -1,
+            "min": -1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "max_val_samples",
+            "type": "int",
+            "default": -1,
+            "min": -1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "seed",
+            "type": "int",
+            "default": 10,
+            "min": 1,
+            "max": 1000,
+            "scope": "algorithm"
+        },
+        {
+            "name": "max_input_length",
+            "type": "int",
+            "default": -1,
+            "min": -1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "validation_split_ratio",
+            "type": "float",
+            "default": 0.2,
+            "min": 0,
+            "max": 1,
+            "scope": "algorithm"
+        },
+        {
+            "name": "train_data_split_seed",
+            "type": "int",
+            "default": 0,
+            "min": 0,
+            "scope": "algorithm"
+        },
+        {
+            "name": "preprocessing_num_workers",
+            "type": "text",
+            "default": "None",
+            "scope": "algorithm"
+        },
+        {
+            "name": "sagemaker_submit_directory",
+            "type": "text",
+            "default": "/opt/ml/input/data/code/sourcedir.tar.gz",
+            "scope": "container"
+        },
+        {
+            "name": "sagemaker_program",
+            "type": "text",
+            "default": "transfer_learning.py",
+            "scope": "container"
+        },
+        {
+            "name": "sagemaker_container_log_level",
+            "type": "text",
+            "default": "20",
+            "scope": "container"
+        }
+    ],
+    "training_script_key": "source-directory-tarballs/training/meta-textgeneration/v1.2.0/sourcedir.tar.gz",
+    "training_prepacked_script_key": "source-directory-tarballs/training/meta-textgeneration/prepack/inference-meta-textgeneration/v1.2.0/sourcedir.tar.gz",
+    "training_prepacked_script_version": "1.2.0",
+    "training_ecr_specs": {
+        "framework": "huggingface",
+        "framework_version": "2.0.0",
+        "py_version": "py310",
+        "huggingface_transformers_version": "4.28.1"
+    },
+    "training_artifact_key": "meta-training/v1.1.0/train-meta-textgeneration-llama-2-7b-f.tar.gz",
+    "inference_environment_variables": [
+        {
+            "name": "SAGEMAKER_PROGRAM",
+            "type": "text",
+            "default": "inference.py",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "SAGEMAKER_SUBMIT_DIRECTORY",
+            "type": "text",
+            "default": "/opt/ml/model/code",
+            "scope": "container",
+            "required_for_model_class": false
+        },
+        {
+            "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
+            "type": "text",
+            "default": "20",
+            "scope": "container",
+            "required_for_model_class": false
+        },
+        {
+            "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
+            "type": "text",
+            "default": "3600",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "ENDPOINT_SERVER_TIMEOUT",
+            "type": "int",
+            "default": 3600,
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "MODEL_CACHE_ROOT",
+            "type": "text",
+            "default": "/opt/ml/model",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "SAGEMAKER_ENV",
+            "type": "text",
+            "default": "1",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "HF_MODEL_ID",
+            "type": "text",
+            "default": "/opt/ml/model",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "OPTION_GPU_MEMORY_UTILIZATION",
+            "type": "text",
+            "default": "0.85",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "SM_NUM_GPUS",
+            "type": "text",
+            "default": "1",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "MAX_INPUT_LENGTH",
+            "type": "text",
+            "default": "4095",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "MAX_TOTAL_TOKENS",
+            "type": "text",
+            "default": "4096",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "MAX_BATCH_PREFILL_TOKENS",
+            "type": "text",
+            "default": "8192",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "MAX_CONCURRENT_REQUESTS",
+            "type": "text",
+            "default": "512",
+            "scope": "container",
+            "required_for_model_class": true
+        },
+        {
+            "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
+            "type": "int",
+            "default": 1,
+            "scope": "container",
+            "required_for_model_class": true
+        }
+    ],
+    "metrics": [
+        {
+            "Name": "huggingface-textgeneration:eval-loss",
+            "Regex": "eval_epoch_loss=tensor\\(([0-9\\.]+)"
+        },
+        {
+            "Name": "huggingface-textgeneration:eval-ppl",
+            "Regex": "eval_ppl=tensor\\(([0-9\\.]+)"
+        },
+        {
+            "Name": "huggingface-textgeneration:train-loss",
+            "Regex": "train_epoch_loss=([0-9\\.]+)"
+        }
+    ],
+    "default_inference_instance_type": "ml.g5.12xlarge",
+    "supported_inference_instance_types": [
+        "ml.g5.12xlarge",
+        "ml.g5.24xlarge",
+        "ml.g5.2xlarge",
+        "ml.g5.48xlarge",
+        "ml.g5.4xlarge",
+        "ml.g5.8xlarge",
+        "ml.g6.12xlarge",
+        "ml.p4d.24xlarge"
+    ],
+    "default_training_instance_type": "ml.g5.12xlarge",
+    "supported_training_instance_types": [
+        "ml.g5.12xlarge",
+        "ml.g5.24xlarge",
+        "ml.g5.48xlarge",
+        "ml.p3dn.24xlarge",
+        "ml.g4dn.12xlarge"
+    ],
+    "model_kwargs": {},
+    "estimator_kwargs": {
+        "encrypt_inter_container_traffic": true,
+        "disable_output_compression": true,
+        "max_run": 360000
+    },
+    "fit_kwargs": {},
+    "inference_volume_size": 256,
+    "training_volume_size": 256,
+    "inference_enable_network_isolation": true,
+    "training_enable_network_isolation": true,
+    "default_training_dataset_key": "training-datasets/oasst_top/train/",
+    "validation_supported": true,
+    "fine_tuning_supported": true,
+    "resource_name_base": "meta-textgeneration-llama-2-7b-f",
+    "gated_bucket": true,
+    "training_instance_type_variants": {
+        "regional_aliases": {
+            "af-south-1": {
+                "gpu_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-east-1": {
+                "gpu_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-northeast-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-northeast-2": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-northeast-3": {
+                "gpu_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-south-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-southeast-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-southeast-2": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ap-southeast-3": {
+                "gpu_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ca-central-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "ca-west-1": {
+                "gpu_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "cn-north-1": {
+                "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "cn-northwest-1": {
+                "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "eu-central-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "eu-north-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "eu-south-1": {
+                "gpu_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "eu-west-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "eu-west-2": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "eu-west-3": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "il-central-1": {
+                "gpu_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "me-central-1": {
+                "gpu_ecr_uri_1": "914824155844.dkr.ecr.me-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "me-south-1": {
+                "gpu_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "sa-east-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "us-east-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "us-east-2": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "us-gov-east-1": {
+                "gpu_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "us-gov-west-1": {
+                "gpu_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "us-west-1": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            },
+            "us-west-2": {
+                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
+            }
+        },
+        "variants": {
+            "g4dn": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                },
+                "properties": {
+                    "gated_model_key_env_var_value": "meta-training/g4dn/v1.0.0/train-meta-textgeneration-llama-2-7b-f.tar.gz"
+                }
+            },
+            "g5": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                },
+                "properties": {
+                    "gated_model_key_env_var_value": "meta-training/g5/v1.0.0/train-meta-textgeneration-llama-2-7b-f.tar.gz"
+                }
+            },
+            "g6": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "g6e": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "local_gpu": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p2": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p3": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p3dn": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                },
+                "properties": {
+                    "gated_model_key_env_var_value": "meta-training/p3dn/v1.0.0/train-meta-textgeneration-llama-2-7b-f.tar.gz"
+                }
+            },
+            "p4d": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p4de": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p5": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p5e": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p5en": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p6": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            },
+            "p6e": {
+                "regional_properties": {
+                    "image_uri": "$gpu_ecr_uri_1"
+                }
+            }
+        }
+    },
+    "hosting_artifact_s3_data_type": "S3Prefix",
+    "hosting_artifact_compression_type": "None",
+    "dynamic_container_deployment_supported": true,
+    "inference_configs": {
+        "tgi": {
+            "component_names": [
+                "tgi"
+            ]
+        },
+        "lmi": {
+            "component_names": [
+                "lmi"
+            ],
+            "benchmark_metrics": {
+                "ml.g6.12xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.19",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "19.7",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.22",
+                        "concurrency": "32"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "11.6",
+                        "concurrency": "32"
+                    }
+                ],
+                "ml.p4d.24xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "2.58",
+                        "concurrency": "256"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "3448.3",
+                        "concurrency": "256"
+                    }
+                ]
+            }
+        },
+        "lmi-optimized": {
+            "component_names": [
+                "lmi-optimized"
+            ],
+            "benchmark_metrics": {
+                "ml.g5.12xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.23",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "156.2",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.25",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "93.1",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.27",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "58.2",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.29",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "31.0",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.42",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "15.2",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.58",
+                        "concurrency": "32"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "8.0",
+                        "concurrency": "32"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "2.42",
+                        "concurrency": "128"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "4.6",
+                        "concurrency": "128"
+                    }
+                ],
+                "ml.g5.2xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.19",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "66.9",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.19",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "55.5",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.22",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "41.8",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.44",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "31.3",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "2.87",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "71.1",
+                        "concurrency": "16"
+                    }
+                ],
+                "ml.g6.12xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.16",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "107.1",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.17",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "79.5",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.19",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "55.1",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.21",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "34.4",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "3.75",
+                        "concurrency": "64"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "8.3",
+                        "concurrency": "64"
+                    }
+                ],
+                "ml.g6.2xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.23",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "38.2",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.30",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "32.9",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.30",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "24.5",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.60",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "21.0",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "4.19",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "50.0",
+                        "concurrency": "16"
+                    }
+                ],
+                "ml.p4d.24xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.06",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "150.2",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.06",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "149.0",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.06",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "149.0",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.06",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "141.0",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.06",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "128.9",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.06",
+                        "concurrency": "32"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "105.2",
+                        "concurrency": "32"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.07",
+                        "concurrency": "64"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "73.9",
+                        "concurrency": "64"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.37",
+                        "concurrency": "128"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "68.4",
+                        "concurrency": "128"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "4.58",
+                        "concurrency": "512"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "11111.1",
+                        "concurrency": "512"
+                    }
+                ],
+                "ml.p5.48xlarge": [
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.04",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "253.2",
+                        "concurrency": "1"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.03",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "257.1",
+                        "concurrency": "2"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.03",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "252.5",
+                        "concurrency": "4"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.03",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "236.4",
+                        "concurrency": "8"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.04",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "213.2",
+                        "concurrency": "16"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.04",
+                        "concurrency": "32"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "178.6",
+                        "concurrency": "32"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.04",
+                        "concurrency": "64"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "129.0",
+                        "concurrency": "64"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.04",
+                        "concurrency": "128"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "81.2",
+                        "concurrency": "128"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "0.33",
+                        "concurrency": "256"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "74.5",
+                        "concurrency": "256"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "1.77",
+                        "concurrency": "512"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "724.6",
+                        "concurrency": "512"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "2.96",
+                        "concurrency": "768"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "6666.7",
+                        "concurrency": "768"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "2.22",
+                        "concurrency": "1024"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "5882.4",
+                        "concurrency": "1024"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "3.88",
+                        "concurrency": "1280"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "11111.1",
+                        "concurrency": "1280"
+                    },
+                    {
+                        "name": "latency",
+                        "unit": "sec",
+                        "value": "3.99",
+                        "concurrency": "1536"
+                    },
+                    {
+                        "name": "throughput",
+                        "unit": "tokens/sec",
+                        "value": "11111.1",
+                        "concurrency": "1536"
+                    }
+                ]
+            },
+            "acceleration_configs": [
+                {
+                    "type": "Compilation",
+                    "enabled": false
+                },
+                {
+                    "type": "Speculative-Decoding",
+                    "enabled": true
+                },
+                {
+                    "type": "Quantization",
+                    "enabled": false
+                }
+            ]
+        },
+        "neuron": {
+            "component_names": [
+                "neuron"
+            ]
+        }
+    },
+    "inference_config_components": {
+        "tgi": {
+            "hosting_ecr_specs": {
+                "framework": "huggingface-llm",
+                "framework_version": "2.0.0",
+                "py_version": "py310"
+            },
+            "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
+            "hosting_use_script_uri": false,
+            "inference_dependencies": [],
+            "inference_vulnerable": false,
+            "inference_vulnerabilities": [],
+            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
+            "hosting_prepacked_artifact_version": "1.1.0",
+            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
+            "hosting_artifact_s3_data_type": "S3Prefix",
+            "hosting_artifact_compression_type": "None",
+            "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
+            "hosting_neuron_model_version": "1.0.0",
+            "model_kwargs": {},
+            "deploy_kwargs": {
+                "model_data_download_timeout": 1200,
+                "container_startup_health_check_timeout": 1200
+            },
+            "predictor_specs": {
+                "supported_content_types": [
+                    "application/json"
+                ],
+                "supported_accept_types": [
+                    "application/json"
+                ],
+                "default_content_type": "application/json",
+                "default_accept_type": "application/json"
+            },
+            "default_inference_instance_type": "ml.g5.12xlarge",
+            "supported_inference_instance_types": [
+                "ml.g5.12xlarge",
+                "ml.g5.24xlarge",
+                "ml.g5.2xlarge",
+                "ml.g5.48xlarge",
+                "ml.g5.4xlarge",
+                "ml.g5.8xlarge",
+                "ml.g6.12xlarge",
+                "ml.p4d.24xlarge"
+            ],
+            "hosting_instance_type_variants": {
+                "regional_aliases": {
+                    "af-south-1": {
+                        "gpu_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-east-1": {
+                        "gpu_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-east-2": {
+                        "gpu_ecr_uri_1": "975050140332.dkr.ecr.ap-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-northeast-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-northeast-2": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-northeast-3": {
+                        "gpu_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-south-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-south-2": {
+                        "gpu_ecr_uri_1": "772153158452.dkr.ecr.ap-south-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-southeast-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-southeast-2": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-southeast-3": {
+                        "gpu_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-southeast-4": {
+                        "gpu_ecr_uri_1": "457447274322.dkr.ecr.ap-southeast-4.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-southeast-5": {
+                        "gpu_ecr_uri_1": "550225433462.dkr.ecr.ap-southeast-5.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ap-southeast-7": {
+                        "gpu_ecr_uri_1": "590183813437.dkr.ecr.ap-southeast-7.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ca-central-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "ca-west-1": {
+                        "gpu_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "cn-north-1": {
+                        "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "cn-northwest-1": {
+                        "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-central-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-central-2": {
+                        "gpu_ecr_uri_1": "380420809688.dkr.ecr.eu-central-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-north-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-south-1": {
+                        "gpu_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-south-2": {
+                        "gpu_ecr_uri_1": "503227376785.dkr.ecr.eu-south-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-west-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-west-2": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "eu-west-3": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "il-central-1": {
+                        "gpu_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "me-central-1": {
+                        "gpu_ecr_uri_1": "914824155844.dkr.ecr.me-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "me-south-1": {
+                        "gpu_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "mx-central-1": {
+                        "gpu_ecr_uri_1": "637423239942.dkr.ecr.mx-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "sa-east-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "us-east-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "us-east-2": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "us-gov-east-1": {
+                        "gpu_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "us-gov-west-1": {
+                        "gpu_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "us-west-1": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    },
+                    "us-west-2": {
+                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
+                    }
+                },
+                "variants": {
+                    "g4dn": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "g5": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "g6": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "g6e": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "local_gpu": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p2": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p3": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p3dn": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p4d": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p4de": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p5": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p5e": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p5en": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p6": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "p6e": {
+                        "regional_properties": {
+                            "image_uri": "$gpu_ecr_uri_1"
+                        }
+                    },
+                    "ml.g5.12xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "SM_NUM_GPUS": "4",
+                                "MAX_BATCH_PREFILL_TOKENS": "16384"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 98304,
+                                "num_accelerators": 4
+                            }
+                        }
+                    },
+                    "ml.g5.24xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "SM_NUM_GPUS": "4"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 196608,
+                                "num_accelerators": 4
+                            }
+                        }
+                    },
+                    "ml.g5.48xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "SM_NUM_GPUS": "8"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 393216,
+                                "num_accelerators": 8
+                            }
+                        }
+                    },
+                    "ml.p4d.24xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "SM_NUM_GPUS": "8",
+                                "MAX_BATCH_PREFILL_TOKENS": "16384"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 589824,
+                                "num_accelerators": 8
+                            }
+                        }
+                    },
+                    "ml.p5.48xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_GPU_MEMORY_UTILIZATION": "0.95"
+                            }
+                        }
+                    },
+                    "ml.g5.2xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 16384,
+                                "num_accelerators": 1
+                            }
+                        }
+                    },
+                    "ml.g5.4xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 32768,
+                                "num_accelerators": 1
+                            }
+                        }
+                    },
+                    "ml.g5.8xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 65536,
+                                "num_accelerators": 1
+                            }
+                        }
+                    }
+                }
+            },
+            "inference_volume_size": 256,
+            "inference_enable_network_isolation": true,
+            "hosting_resource_requirements": {
+                "min_memory_mb": 98304,
+                "num_accelerators": 4
+            },
+            "inference_environment_variables": [
+                {
+                    "name": "SAGEMAKER_PROGRAM",
+                    "type": "text",
+                    "default": "inference.py",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
+                    "type": "text",
+                    "default": "/opt/ml/model/code",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
+                    "type": "text",
+                    "default": "20",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
+                    "type": "text",
+                    "default": "3600",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "ENDPOINT_SERVER_TIMEOUT",
+                    "type": "int",
+                    "default": 3600,
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MODEL_CACHE_ROOT",
+                    "type": "text",
+                    "default": "/opt/ml/model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_ENV",
+                    "type": "text",
+                    "default": "1",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "HF_MODEL_ID",
+                    "type": "text",
+                    "default": "/opt/ml/model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "OPTION_GPU_MEMORY_UTILIZATION",
+                    "type": "text",
+                    "default": "0.85",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SM_NUM_GPUS",
+                    "type": "text",
+                    "default": "1",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MAX_INPUT_LENGTH",
+                    "type": "text",
+                    "default": "4095",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MAX_TOTAL_TOKENS",
+                    "type": "text",
+                    "default": "4096",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MAX_BATCH_PREFILL_TOKENS",
+                    "type": "text",
+                    "default": "8192",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MAX_CONCURRENT_REQUESTS",
+                    "type": "text",
+                    "default": "512",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
+                    "type": "int",
+                    "default": 1,
+                    "scope": "container",
+                    "required_for_model_class": true
+                }
+            ],
+            "default_payloads": {
+                "pingExponentialBackoff": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text",
+                        "input_logprobs": "[0].details.prefill[*].logprob"
+                    },
+                    "body": {
+                        "inputs": "import socket\n\ndef ping_exponential_backoff(host: str):",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.2,
+                            "decoder_input_details": true,
+                            "details": true
+                        }
+                    }
+                },
+                "argparse": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text"
+                    },
+                    "body": {
+                        "inputs": "import argparse\n\ndef main(string: str):\n    print(string)\n    print(string[::-1])\n\nif __name__ == \"__main__\":",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05
+                        }
+                    }
+                },
+                "Fibonacci": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text",
+                        "input_logprobs": "[0].details.prefill[*].logprob"
+                    },
+                    "body": {
+                        "inputs": "def fib(n):\n",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.2,
+                            "decoder_input_details": true,
+                            "details": true
+                        }
+                    }
+                },
+                "removeNonAscii": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text",
+                        "input_logprobs": "[0].details.prefill[*].logprob"
+                    },
+                    "body": {
+                        "inputs": "def remove_non_ascii(s: str) -> str:\n    \"\"\"<FILL>\n    return result\n",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05,
+                            "decoder_input_details": true,
+                            "details": true
+                        }
+                    }
+                },
+                "installationInstructions": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text"
+                    },
+                    "body": {
+                        "inputs": "# Installation instructions:\n    ```bash\n<FILL>\n    ```\nThis downloads the LLaMA inference code and installs the repository as a local pip package.\n",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05
+                        }
+                    }
+                },
+                "interfaceManager": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text"
+                    },
+                    "body": {
+                        "inputs": "class InterfaceManagerFactory(AbstractManagerFactory):\n    def __init__(<FILL>\ndef main():\n    factory = InterfaceManagerFactory(start=datetime.now())\n    managers = []\n    for i in range(10):\n        managers.append(factory.build(id=i))\n",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05
+                        }
+                    }
+                },
+                "quasiPrefunctoid": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text"
+                    },
+                    "body": {
+                        "inputs": "/-- A quasi-prefunctoid is 1-connected iff all its etalisations are 1-connected. -/\ntheorem connected_iff_etalisation [C D : precategoroid] (P : quasi_prefunctoid C D) :\n  π₁ P = 0 ↔ <FILL> = 0 :=\nbegin\n  split,\n  { intros h f,\n    rw pi_1_etalisation at h,\n    simp [h],\n    refl\n  },\n  { intro h,\n    have := @quasi_adjoint C D P,\n    simp [←pi_1_etalisation, this, h],\n    refl\n  }\nend\n",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05
+                        }
+                    }
+                },
+                "bashListTextFiles": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text",
+                        "input_logprobs": "[0].details.prefill[*].logprob"
+                    },
+                    "body": {
+                        "inputs": "<s>[INST] In Bash, how do I list all text files in the current directory (excluding subdirectories) that have been modified in the last month? [/INST] ",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05,
+                            "decoder_input_details": true,
+                            "details": true
+                        }
+                    }
+                },
+                "inorderPreorderTraversal": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text"
+                    },
+                    "body": {
+                        "inputs": "<s>[INST] What is the difference between inorder and preorder traversal? Give an example in Python. [/INST] ",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05
+                        }
+                    }
+                },
+                "contiguousSublists": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "[0].generated_text"
+                    },
+                    "body": {
+                        "inputs": "<s>[INST] <<SYS>>\nProvide answers in JavaScript\n<</SYS>>\n\nWrite a function that computes the set of sums of all contiguous sublists of a given list. [/INST] ",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.05
+                        }
+                    }
+                }
+            }
+        },
+        "lmi": {
+            "hosting_ecr_specs": {
+                "framework": "djl-deepspeed",
+                "framework_version": "0.27.0",
+                "py_version": "py310"
+            },
+            "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
+            "hosting_use_script_uri": false,
+            "inference_dependencies": [],
+            "inference_vulnerable": false,
+            "inference_vulnerabilities": [],
+            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
+            "hosting_prepacked_artifact_version": "1.1.0",
+            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
+            "hosting_artifact_s3_data_type": "S3Prefix",
+            "hosting_artifact_compression_type": "None",
+            "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
+            "hosting_neuron_model_version": "1.0.0",
+            "model_kwargs": {},
+            "deploy_kwargs": {
+                "model_data_download_timeout": 1200,
+                "container_startup_health_check_timeout": 1200
+            },
+            "predictor_specs": {
+                "supported_content_types": [
+                    "application/json"
+                ],
+                "supported_accept_types": [
+                    "application/json"
+                ],
+                "default_content_type": "application/json",
+                "default_accept_type": "application/json"
+            },
+            "default_inference_instance_type": "ml.g5.12xlarge",
+            "supported_inference_instance_types": [
+                "ml.g5.12xlarge",
+                "ml.g5.24xlarge",
+                "ml.g5.2xlarge",
+                "ml.g5.48xlarge",
+                "ml.g5.4xlarge",
+                "ml.g5.8xlarge",
+                "ml.g6.12xlarge",
+                "ml.p4d.24xlarge"
+            ],
+            "hosting_instance_type_variants": {
+                "regional_aliases": {
+                    "af-south-1": {
+                        "alias_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-east-1": {
+                        "alias_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-northeast-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-northeast-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-northeast-3": {
+                        "alias_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-south-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-southeast-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-southeast-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ap-southeast-3": {
+                        "alias_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ca-central-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "ca-west-1": {
+                        "alias_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "cn-north-1": {
+                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "cn-northwest-1": {
+                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "eu-central-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "eu-north-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "eu-south-1": {
+                        "alias_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "eu-west-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "eu-west-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "eu-west-3": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "il-central-1": {
+                        "alias_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "me-south-1": {
+                        "alias_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "sa-east-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "us-east-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "us-east-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "us-gov-east-1": {
+                        "alias_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "us-gov-west-1": {
+                        "alias_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "us-west-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    },
+                    "us-west-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
+                    }
+                },
+                "variants": {
+                    "g4dn": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "g5": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "g6": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "g6e": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "local_gpu": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p2": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p3": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p3dn": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p4d": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p4de": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p5": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p5e": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p5en": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p6": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p6e": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "ml.p4d.24xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "1"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 589824,
+                                "num_accelerators": 8
+                            }
+                        }
+                    },
+                    "ml.p5.48xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "1",
+                                "OPTION_GPU_MEMORY_UTILIZATION": "0.95"
+                            }
+                        }
+                    },
+                    "ml.g5.2xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 16384,
+                                "num_accelerators": 1
+                            }
+                        }
+                    },
+                    "ml.g5.4xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 32768,
+                                "num_accelerators": 1
+                            }
+                        }
+                    },
+                    "ml.g5.8xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 65536,
+                                "num_accelerators": 1
+                            }
+                        }
+                    },
+                    "ml.g5.12xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 98304,
+                                "num_accelerators": 4
+                            }
+                        }
+                    },
+                    "ml.g5.24xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 196608,
+                                "num_accelerators": 4
+                            }
+                        }
+                    },
+                    "ml.g5.48xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 393216,
+                                "num_accelerators": 8
+                            }
+                        }
+                    }
+                }
+            },
+            "inference_volume_size": 256,
+            "inference_enable_network_isolation": true,
+            "hosting_resource_requirements": {
+                "min_memory_mb": 98304,
+                "num_accelerators": 4
+            },
+            "inference_environment_variables": [
+                {
+                    "name": "SAGEMAKER_PROGRAM",
+                    "type": "text",
+                    "default": "inference.py",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
+                    "type": "text",
+                    "default": "/opt/ml/model/code",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
+                    "type": "text",
+                    "default": "20",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
+                    "type": "text",
+                    "default": "3600",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "ENDPOINT_SERVER_TIMEOUT",
+                    "type": "int",
+                    "default": 3600,
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MODEL_CACHE_ROOT",
+                    "type": "text",
+                    "default": "/opt/ml/model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_ENV",
+                    "type": "text",
+                    "default": "1",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "HF_MODEL_ID",
+                    "type": "text",
+                    "default": "/opt/ml/model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "OPTION_GPU_MEMORY_UTILIZATION",
+                    "type": "text",
+                    "default": "0.85",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
+                    "type": "int",
+                    "default": 1,
+                    "scope": "container",
+                    "required_for_model_class": true
+                }
+            ],
+            "default_payloads": {
+                "meaningOfLife": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "I believe the meaning of life is",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6,
+                            "decoder_input_details": true,
+                            "details": true
+                        }
+                    }
+                },
+                "theoryOfRelativity": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "Simply put, the theory of relativity states that ",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "teamMessage": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "A brief message congratulating the team on the launch:\n\nHi everyone,\n\nI just ",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "englishToFrench": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "Translate English to French:\nsea otter => loutre de mer\npeppermint => menthe poivrée\nplush girafe => girafe peluche\ncheese =>",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                }
+            }
+        },
+        "lmi-optimized": {
+            "hosting_ecr_specs": {
+                "framework": "djl-lmi",
+                "framework_version": "0.28.0",
+                "py_version": "py310"
+            },
+            "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
+            "hosting_use_script_uri": false,
+            "inference_dependencies": [],
+            "inference_vulnerable": false,
+            "inference_vulnerabilities": [],
+            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
+            "hosting_prepacked_artifact_version": "1.1.0",
+            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
+            "hosting_artifact_s3_data_type": "S3Prefix",
+            "hosting_artifact_compression_type": "None",
+            "hosting_additional_data_sources": {
+                "speculative_decoding": [
+                    {
+                        "channel_name": "draft_model",
+                        "artifact_version": "v2",
+                        "s3_data_source": {
+                            "compression_type": "None",
+                            "s3_data_type": "S3Prefix",
+                            "s3_uri": "sagemaker-speculative-decoding-llama2-tiny-v2/"
+                        }
+                    }
+                ]
+            },
+            "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
+            "hosting_neuron_model_version": "1.0.0",
+            "model_kwargs": {},
+            "deploy_kwargs": {
+                "model_data_download_timeout": 1200,
+                "container_startup_health_check_timeout": 1200
+            },
+            "predictor_specs": {
+                "supported_content_types": [
+                    "application/json"
+                ],
+                "supported_accept_types": [
+                    "application/json"
+                ],
+                "default_content_type": "application/json",
+                "default_accept_type": "application/json"
+            },
+            "default_inference_instance_type": "ml.p4d.24xlarge",
+            "supported_inference_instance_types": [
+                "ml.g5.12xlarge",
+                "ml.g5.2xlarge",
+                "ml.g6.12xlarge",
+                "ml.g6.2xlarge",
+                "ml.p4d.24xlarge",
+                "ml.p4de.24xlarge",
+                "ml.p5.48xlarge"
+            ],
+            "hosting_instance_type_variants": {
+                "regional_aliases": {
+                    "af-south-1": {
+                        "alias_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-east-1": {
+                        "alias_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-northeast-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-northeast-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-northeast-3": {
+                        "alias_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-south-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-southeast-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-southeast-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ap-southeast-3": {
+                        "alias_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ca-central-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "ca-west-1": {
+                        "alias_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "cn-north-1": {
+                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "cn-northwest-1": {
+                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "eu-central-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "eu-north-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "eu-south-1": {
+                        "alias_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "eu-west-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "eu-west-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "eu-west-3": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "il-central-1": {
+                        "alias_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "me-central-1": {
+                        "alias_ecr_uri_1": "914824155844.dkr.ecr.me-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "me-south-1": {
+                        "alias_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "sa-east-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "us-east-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "us-east-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "us-gov-east-1": {
+                        "alias_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "us-gov-west-1": {
+                        "alias_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "us-west-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    },
+                    "us-west-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
+                    }
+                },
+                "variants": {
+                    "g4dn": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "g5": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "g6": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "g6e": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "local_gpu": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p2": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p3": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p3dn": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p4d": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p4de": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p5": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p5e": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p5en": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p6": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "p6e": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "ml.p4d.24xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "1"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 589824,
+                                "num_accelerators": 8
+                            }
+                        }
+                    },
+                    "ml.p5.48xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "1",
+                                "OPTION_GPU_MEMORY_UTILIZATION": "0.95"
+                            }
+                        }
+                    },
+                    "ml.p4de.24xlarge": {
+                        "properties": {
+                            "resource_requirements": {
+                                "min_memory_mb": 589824,
+                                "num_accelerators": 8
+                            }
+                        }
+                    }
+                }
+            },
+            "inference_volume_size": 256,
+            "inference_enable_network_isolation": true,
+            "hosting_resource_requirements": {
+                "min_memory_mb": 589824,
+                "num_accelerators": 8
+            },
+            "inference_environment_variables": [
+                {
+                    "name": "SAGEMAKER_PROGRAM",
+                    "type": "text",
+                    "default": "inference.py",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
+                    "type": "text",
+                    "default": "/opt/ml/model/code",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
+                    "type": "text",
+                    "default": "20",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
+                    "type": "text",
+                    "default": "3600",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "ENDPOINT_SERVER_TIMEOUT",
+                    "type": "int",
+                    "default": 3600,
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MODEL_CACHE_ROOT",
+                    "type": "text",
+                    "default": "/opt/ml/model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_ENV",
+                    "type": "text",
+                    "default": "1",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "HF_MODEL_ID",
+                    "type": "text",
+                    "default": "/opt/ml/model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "OPTION_SPECULATIVE_DRAFT_MODEL",
+                    "type": "text",
+                    "default": "/opt/ml/additional-model-data-sources/draft_model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "OPTION_GPU_MEMORY_UTILIZATION",
+                    "type": "text",
+                    "default": "0.85",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
+                    "type": "int",
+                    "default": 1,
+                    "scope": "container",
+                    "required_for_model_class": true
+                }
+            ],
+            "default_payloads": {
+                "meaningOfLife": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "I believe the meaning of life is",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "theoryOfRelativity": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "Simply put, the theory of relativity states that ",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "teamMessage": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "A brief message congratulating the team on the launch:\n\nHi everyone,\n\nI just ",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "englishToFrench": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "Translate English to French:\nsea otter => loutre de mer\npeppermint => menthe poivrée\nplush girafe => girafe peluche\ncheese =>",
+                        "parameters": {
+                            "max_new_tokens": 64,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                }
+            }
+        },
+        "neuron": {
+            "hosting_ecr_specs": {
+                "framework": "djl-neuronx",
+                "framework_version": "0.24.0",
+                "py_version": "py39"
+            },
+            "hosting_script_key": "source-directory-tarballs/meta/inference/textgenerationneuron/v1.0.0/sourcedir.tar.gz",
+            "hosting_use_script_uri": false,
+            "inference_dependencies": [
+                "sagemaker_jumpstart_huggingface_script_utilities==1.0.8",
+                "sagemaker_jumpstart_script_utilities==1.1.8"
+            ],
+            "inference_vulnerable": false,
+            "inference_vulnerabilities": [],
+            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/neuron/inference/v1.0.0/",
+            "hosting_prepacked_artifact_version": "1.0.0",
+            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/neuron/inference-prepack/v1.0.0/",
+            "hosting_artifact_s3_data_type": "S3Prefix",
+            "hosting_artifact_compression_type": "None",
+            "hosting_neuron_model_id": "meta-textgeneration-llama-2-7b-f",
+            "hosting_neuron_model_version": "1.0.0",
+            "model_kwargs": {},
+            "deploy_kwargs": {
+                "model_data_download_timeout": 3600,
+                "container_startup_health_check_timeout": 3600
+            },
+            "predictor_specs": {
+                "supported_content_types": [
+                    "application/json"
+                ],
+                "supported_accept_types": [
+                    "application/json"
+                ],
+                "default_content_type": "application/json",
+                "default_accept_type": "application/json"
+            },
+            "default_inference_instance_type": "ml.inf2.xlarge",
+            "supported_inference_instance_types": [
+                "ml.inf2.xlarge",
+                "ml.inf2.8xlarge",
+                "ml.inf2.24xlarge",
+                "ml.inf2.48xlarge"
+            ],
+            "hosting_instance_type_variants": {
+                "regional_aliases": {
+                    "ap-northeast-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "ap-south-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "ap-southeast-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "ap-southeast-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "ap-southeast-5": {
+                        "alias_ecr_uri_1": "550225433462.dkr.ecr.ap-southeast-5.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "ap-southeast-7": {
+                        "alias_ecr_uri_1": "590183813437.dkr.ecr.ap-southeast-7.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "ca-west-1": {
+                        "alias_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "eu-central-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "eu-central-2": {
+                        "alias_ecr_uri_1": "380420809688.dkr.ecr.eu-central-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "eu-west-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "eu-west-3": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "mx-central-1": {
+                        "alias_ecr_uri_1": "637423239942.dkr.ecr.mx-central-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "sa-east-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "us-east-1": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "us-east-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    },
+                    "us-west-2": {
+                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
+                    }
+                },
+                "variants": {
+                    "inf2": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "trn1": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "trn1n": {
+                        "regional_properties": {
+                            "image_uri": "$alias_ecr_uri_1"
+                        }
+                    },
+                    "ml.inf2.xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "2",
+                                "OPTION_N_POSITIONS": "1024",
+                                "OPTION_DTYPE": "fp16",
+                                "OPTION_ROLLING_BATCH": "auto",
+                                "OPTION_MAX_ROLLING_BATCH_SIZE": "1",
+                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 8192,
+                                "num_accelerators": 1
+                            }
+                        }
+                    },
+                    "ml.inf2.8xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "2",
+                                "OPTION_N_POSITIONS": "2048",
+                                "OPTION_DTYPE": "fp16",
+                                "OPTION_ROLLING_BATCH": "auto",
+                                "OPTION_MAX_ROLLING_BATCH_SIZE": "4",
+                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 65536,
+                                "num_accelerators": 1
+                            }
+                        }
+                    },
+                    "ml.inf2.24xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "12",
+                                "OPTION_N_POSITIONS": "4096",
+                                "OPTION_DTYPE": "fp16",
+                                "OPTION_ROLLING_BATCH": "auto",
+                                "OPTION_MAX_ROLLING_BATCH_SIZE": "4",
+                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 196608,
+                                "num_accelerators": 6
+                            }
+                        }
+                    },
+                    "ml.inf2.48xlarge": {
+                        "properties": {
+                            "environment_variables": {
+                                "OPTION_TENSOR_PARALLEL_DEGREE": "24",
+                                "OPTION_N_POSITIONS": "4096",
+                                "OPTION_DTYPE": "fp16",
+                                "OPTION_ROLLING_BATCH": "auto",
+                                "OPTION_MAX_ROLLING_BATCH_SIZE": "4",
+                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
+                            },
+                            "resource_requirements": {
+                                "min_memory_mb": 393216,
+                                "num_accelerators": 12
+                            }
+                        }
+                    }
+                }
+            },
+            "inference_volume_size": 256,
+            "inference_enable_network_isolation": false,
+            "hosting_resource_requirements": {
+                "min_memory_mb": 8192,
+                "num_accelerators": 1
+            },
+            "inference_environment_variables": [
+                {
+                    "name": "SAGEMAKER_PROGRAM",
+                    "type": "text",
+                    "default": "inference.py",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
+                    "type": "text",
+                    "default": "/opt/ml/model/code",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
+                    "type": "text",
+                    "default": "20",
+                    "scope": "container",
+                    "required_for_model_class": false
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
+                    "type": "text",
+                    "default": "3600",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "ENDPOINT_SERVER_TIMEOUT",
+                    "type": "int",
+                    "default": 3600,
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "MODEL_CACHE_ROOT",
+                    "type": "text",
+                    "default": "/opt/ml/model",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_ENV",
+                    "type": "text",
+                    "default": "1",
+                    "scope": "container",
+                    "required_for_model_class": true
+                },
+                {
+                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
+                    "type": "int",
+                    "default": 1,
+                    "scope": "container",
+                    "required_for_model_class": true
+                }
+            ],
+            "default_payloads": {
+                "mayonnaise": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "<s>[INST] what is the recipe of mayonnaise? [/INST] ",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "parisTrip": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "<s>[INST] I am going to Paris, what should I see? [/INST] Paris, the capital of France, is known for its stunning architecture, art museums, historical landmarks, and romantic atmosphere. Here are some of the top attractions to see in Paris:\n\n1. The Eiffel Tower: The iconic Eiffel Tower is one of the most recognizable landmarks in the world and offers breathtaking views of the city.\n2. The Louvre Museum: The Louvre is one of the world's largest and most famous museums, housing an impressive collection of art and artifacts, including the Mona Lisa.\n3. Notre-Dame Cathedral: This beautiful cathedral is one of the most famous landmarks in Paris and is known for its Gothic architecture and stunning stained glass windows.\n\nThese are just a few of the many attractions that Paris has to offer. With so much to see and do, it's no wonder that Paris is one of the most popular tourist destinations in the world.</s><s>[INST] What is so great about #1? [/INST] ",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "parisHaiku": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "<s>[INST] <<SYS>>\nAlways answer with Haiku\n<</SYS>>\n\nI am going to Paris, what should I see? [/INST] ",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                },
+                "emojisBeijing": {
+                    "content_type": "application/json",
+                    "prompt_key": "inputs",
+                    "output_keys": {
+                        "generated_text": "generated_text"
+                    },
+                    "body": {
+                        "inputs": "<s>[INST] <<SYS>>\nAlways answer with detailed instruction\n<</SYS>>\n\nHow to go from Beijing to NY? [/INST] ",
+                        "parameters": {
+                            "max_new_tokens": 256,
+                            "top_p": 0.9,
+                            "temperature": 0.6
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "inference_config_rankings": {
+        "overall": {
+            "description": "default",
+            "rankings": [
+                "tgi",
+                "lmi",
+                "lmi-optimized",
+                "neuron"
+            ]
+        }
+    },
+    "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
+    "hosting_neuron_model_version": "1.0.0"
+}
\ No newline at end of file
diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py
index c5dab29c16..bb60db68c5 100644
--- a/src/sagemaker/jumpstart/factory/model.py
+++ b/src/sagemaker/jumpstart/factory/model.py
@@ -929,12 +929,6 @@ def get_init_kwargs(
 
     model_init_kwargs = _add_vulnerable_and_deprecated_status_to_kwargs(kwargs=model_init_kwargs)
     model_init_kwargs = _add_model_version_to_kwargs(kwargs=model_init_kwargs)
-
-    # Add instance type before config selection so config compatibility can be checked
-    model_init_kwargs = _add_instance_type_to_kwargs(
-        kwargs=model_init_kwargs, disable_instance_type_logging=disable_instance_type_logging
-    )
-
     model_init_kwargs = _add_config_name_to_init_kwargs(kwargs=model_init_kwargs)
 
     model_init_kwargs = _add_sagemaker_session_with_custom_user_agent_to_kwargs(
@@ -944,6 +938,10 @@ def get_init_kwargs(
 
     model_init_kwargs = _add_model_name_to_kwargs(kwargs=model_init_kwargs)
 
+    model_init_kwargs = _add_instance_type_to_kwargs(
+        kwargs=model_init_kwargs, disable_instance_type_logging=disable_instance_type_logging
+    )
+
     model_init_kwargs = _add_image_uri_to_kwargs(kwargs=model_init_kwargs)
 
     if hub_arn:
diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py
index b0cea6eb02..379d1c0545 100644
--- a/src/sagemaker/jumpstart/types.py
+++ b/src/sagemaker/jumpstart/types.py
@@ -1724,7 +1724,12 @@ def get_top_config_from_ranking(
         for config_name in ranked_config_names:
             resolved_config = self.configs[config_name].resolved_config
             if instance_type:
-                supported_instance_types = getattr(resolved_config, instance_type_attribute, [])
+                # Handle both dict and object types for resolved_config
+                if isinstance(resolved_config, dict):
+                    supported_instance_types = resolved_config.get(instance_type_attribute, [])
+                else:
+                    supported_instance_types = getattr(resolved_config, instance_type_attribute, [])
+                
                 if supported_instance_types and instance_type not in supported_instance_types:
                     continue
             return self.configs[config_name]
diff --git a/src/sagemaker/jumpstart/utils.py b/src/sagemaker/jumpstart/utils.py
index b7c22bbda6..6396390fbd 100644
--- a/src/sagemaker/jumpstart/utils.py
+++ b/src/sagemaker/jumpstart/utils.py
@@ -1237,10 +1237,6 @@ def get_top_ranked_config_name(
 ) -> Optional[str]:
     """Returns the top ranked config name for the given model ID and region.
 
-    Args:
-        instance_type (Optional[str]): The instance type to filter configs by compatibility.
-            If provided, only configs that support this instance type will be considered.
-
     Raises:
         ValueError: If the script scope is not supported by JumpStart.
     """
diff --git a/test_unified_model_card.py b/test_unified_model_card.py
new file mode 100644
index 0000000000..475d46afdb
--- /dev/null
+++ b/test_unified_model_card.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import sys
+import boto3
+import time
+from datetime import datetime
+# from urllib.parse import urlparse
+from unittest.mock import patch
+
+os.environ['HUGGING_FACE_HUB_TOKEN'] = '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'
+os.environ['AWS_DEFAULT_REGION'] = 'us-west-2'
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
+
+from sagemaker.jumpstart.model import JumpStartModel
+from sagemaker.jumpstart.types import JumpStartModelSpecs
+from sagemaker.jumpstart.enums import JumpStartModelType
+
+
+def check_aws_account():
+    """Check which AWS account and region we're using."""
+    try:
+        sts_client = boto3.client('sts')
+        identity = sts_client.get_caller_identity()
+        
+        account_id = identity['Account']
+        user_arn = identity['Arn']
+        region = boto3.Session().region_name or 'us-west-2'
+        
+        print(f" AWS Account: {account_id}")
+        print(f" User/Role: {user_arn}")
+        print(f" Region: {region}")
+        print()
+        
+        return account_id, region
+    except Exception as e:
+        print(f" Error checking AWS account: {e}")
+        return None, None
+
+
+def monitor_endpoint(endpoint_name, region='us-west-2'):
+    """Monitor endpoint deployment progress."""
+    sagemaker_client = boto3.client('sagemaker', region_name=region)
+    
+    print(f" Monitoring endpoint: {endpoint_name}")
+    start_time = time.time()
+    
+    while True:
+        try:
+            response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
+            status = response['EndpointStatus']
+            elapsed = int(time.time() - start_time)
+            
+            print(f"  [{elapsed//60}m {elapsed%60}s] {endpoint_name}: {status}")
+            
+            if status == 'InService':
+                print(f" {endpoint_name} is ready! (took {elapsed//60}m {elapsed%60}s)")
+                break
+            elif status == 'Failed':
+                print(f" {endpoint_name} deployment failed!")
+                print(f"Failure reason: {response.get('FailureReason', 'Unknown')}")
+                break
+                
+        except Exception as e:
+            print(f"Error checking {endpoint_name}: {e}")
+            
+        time.sleep(30)  # Check every 30 seconds
+
+def load_custom_spec():
+    """Load the custom spec file from src/sagemaker directory."""
+    spec_path = os.path.join(os.path.dirname(__file__), 'specfileex')
+    with open(spec_path, 'r') as f:
+        return json.load(f)
+
+
+# Check AWS account 
+account_id, region = check_aws_account()
+
+custom_spec = load_custom_spec()
+mock_specs = JumpStartModelSpecs(custom_spec)
+
+with patch('sagemaker.jumpstart.cache.JumpStartModelsCache.get_specs') as mock_get_specs, \
+     patch('sagemaker.jumpstart.utils.validate_model_id_and_get_type') as mock_validate_model:
+    
+    mock_get_specs.return_value = mock_specs
+    mock_validate_model.return_value = JumpStartModelType.OPEN_WEIGHTS
+    
+    model_id = "meta-textgeneration-llama-2-7b-f"
+    model_version = "4.19.0"
+    accept_eula = False
+    
+    # Create unique endpoint names with timestamp
+    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+    neuron_endpoint_name = f"llama-neuron-{timestamp}"
+    gpu_endpoint_name = f"llama-gpu-{timestamp}"
+    
+    print(f" Neuron endpoint: {neuron_endpoint_name}")
+    print(f" GPU endpoint: {gpu_endpoint_name}")
+    print()
+
+    
+    model_neuron = JumpStartModel(
+        model_id=model_id, 
+        model_version=model_version,
+        instance_type="ml.inf2.24xlarge",
+        env={"HUGGING_FACE_HUB_TOKEN": "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"}
+    )
+
+    # Modify to use alpha us-west-2 bucket
+    original_neuron_uri = model_neuron.model_data['S3DataSource']['S3Uri']
+    # Replace with alpha us-west-2 bucket (handle both east-1 and west-2 original buckets)
+    alpha_neuron_uri = original_neuron_uri.replace('jumpstart-private-cache-prod-us-east-1', 'jumpstart-private-cache-alpha-us-west-2')
+    alpha_neuron_uri = alpha_neuron_uri.replace('jumpstart-private-cache-prod-us-west-2', 'jumpstart-private-cache-alpha-us-west-2')
+    # Also handle regular cache buckets (without "private")
+    alpha_neuron_uri = alpha_neuron_uri.replace('jumpstart-cache-prod-us-east-1', 'jumpstart-cache-alpha-us-west-2')
+    alpha_neuron_uri = alpha_neuron_uri.replace('jumpstart-cache-prod-us-west-2', 'jumpstart-cache-alpha-us-west-2')
+    model_neuron.model_data['S3DataSource']['S3Uri'] = alpha_neuron_uri
+    print(f"Original neuron URI: {original_neuron_uri}")
+    print(f"Alpha neuron URI: {alpha_neuron_uri}")
+    print(model_neuron.model_data)
+    neuron_location = model_neuron.model_data['S3DataSource']['S3Uri']
+    print(f"Neuron location: {neuron_location}")
+
+    print("Deploying neuron model...")
+    neuron_predictor = model_neuron.deploy(
+        initial_instance_count=1,
+        instance_type="ml.inf2.24xlarge",
+        endpoint_name=neuron_endpoint_name,
+        accept_eula=True,
+        wait=False 
+    )
+    
+    # Monitor neuron deployment
+    monitor_endpoint(neuron_endpoint_name, 'us-west-2')
+
+
+
+    model_gpu = JumpStartModel(
+        model_id=model_id, 
+        model_version=model_version, 
+        instance_type="ml.g5.12xlarge",
+        env={"HUGGING_FACE_HUB_TOKEN": "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"}
+    )
+
+    # Modify to use alpha us-west-2 bucket  
+    original_gpu_uri = model_gpu.model_data['S3DataSource']['S3Uri']
+    # Replace with alpha us-west-2 bucket (handle both east-1 and west-2 original buckets)
+    alpha_gpu_uri = original_gpu_uri.replace('jumpstart-private-cache-prod-us-east-1', 'jumpstart-private-cache-alpha-us-west-2')
+    alpha_gpu_uri = alpha_gpu_uri.replace('jumpstart-private-cache-prod-us-west-2', 'jumpstart-private-cache-alpha-us-west-2')
+    # Also handle regular cache buckets (without "private")
+    alpha_gpu_uri = alpha_gpu_uri.replace('jumpstart-cache-prod-us-east-1', 'jumpstart-cache-alpha-us-west-2')
+    alpha_gpu_uri = alpha_gpu_uri.replace('jumpstart-cache-prod-us-west-2', 'jumpstart-cache-alpha-us-west-2')
+    model_gpu.model_data['S3DataSource']['S3Uri'] = alpha_gpu_uri
+    print(f"Original GPU URI: {original_gpu_uri}")
+    print(f"Alpha GPU URI: {alpha_gpu_uri}")
+    print(model_gpu.model_data)
+    gpu_location = model_gpu.model_data['S3DataSource']['S3Uri']
+    print(f"GPU location: {gpu_location}")
+
+    print("Deploying GPU model...")
+    gpu_predictor = model_gpu.deploy(
+        initial_instance_count=1,
+        instance_type="ml.g5.12xlarge",
+        endpoint_name=gpu_endpoint_name,
+        accept_eula=True,
+        wait=False  
+    )
+    
+    # Monitor GPU deployment  
+    monitor_endpoint(gpu_endpoint_name, 'us-west-2')
+
+    test_payload = {
+        "inputs": "The meaning of life is",
+        "parameters": {
+            "max_new_tokens": 50,
+            "temperature": 0.7
+        }
+    }
+
+    print("Testing neuron endpoint...")
+    neuron_response = neuron_predictor.predict(test_payload)
+    print(f"Neuron response: {neuron_response}")
+
+    print("Testing GPU endpoint...")
+    gpu_response = gpu_predictor.predict(test_payload)
+    print(f"GPU response: {gpu_response}")
+
+
+    #print("Cleaning up endpoints...")
+    #neuron_predictor.delete_endpoint()
+    #gpu_predictor.delete_endpoint()

From 2976c5cd585e0b952acea068ae7b8f87751ed3a6 Mon Sep 17 00:00:00 2001
From: Tanvika Boyineni <tanvikab@amazon.com>
Date: Wed, 6 Aug 2025 15:21:51 -0700
Subject: [PATCH 3/4] tests: testing changes

---
 .../jumpstart/test_auto_config_resolution.py  | 236 ++++++++++++++++++
 1 file changed, 236 insertions(+)
 create mode 100644 tests/unit/sagemaker/jumpstart/test_auto_config_resolution.py

diff --git a/tests/unit/sagemaker/jumpstart/test_auto_config_resolution.py b/tests/unit/sagemaker/jumpstart/test_auto_config_resolution.py
new file mode 100644
index 0000000000..05d5dbc8df
--- /dev/null
+++ b/tests/unit/sagemaker/jumpstart/test_auto_config_resolution.py
@@ -0,0 +1,236 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+import pytest
+from unittest.mock import patch, Mock
+from sagemaker.jumpstart import utils
+from sagemaker.jumpstart.enums import JumpStartScriptScope, JumpStartConfigRankingName
+from sagemaker.jumpstart.factory.estimator import _add_config_name_to_kwargs
+from sagemaker.jumpstart.factory.model import (
+    _add_config_name_to_init_kwargs,
+    _add_config_name_to_deploy_kwargs,
+)
+from sagemaker.jumpstart.types import JumpStartEstimatorInitKwargs, JumpStartModelInitKwargs
+
+
+class TestAutoConfigResolution:
+    """Test auto resolution of config names based on instance type."""
+
+    def create_mock_configs(self, scope):
+        """Create mock configs for testing with different supported instance types."""
+        # Mock the config object structure
+        config1 = Mock()
+        config1.config_name = "config1"
+        config1.resolved_config = {
+            "supported_inference_instance_types": ["ml.g5.xlarge", "ml.g5.2xlarge"]
+            if scope == JumpStartScriptScope.INFERENCE
+            else [],
+            "supported_training_instance_types": ["ml.g5.xlarge", "ml.g5.2xlarge"]
+            if scope == JumpStartScriptScope.TRAINING
+            else [],
+        }
+        
+        config2 = Mock()
+        config2.config_name = "config2"
+        config2.resolved_config = {
+            "supported_inference_instance_types": ["ml.p4d.24xlarge", "ml.p5.48xlarge"]
+            if scope == JumpStartScriptScope.INFERENCE
+            else [],
+            "supported_training_instance_types": ["ml.p4d.24xlarge", "ml.p5.48xlarge"]
+            if scope == JumpStartScriptScope.TRAINING
+            else [],
+        }
+        
+        # Config with no instance type restrictions
+        config3 = Mock()
+        config3.config_name = "config3"
+        config3.resolved_config = {
+            "supported_inference_instance_types": []
+            if scope == JumpStartScriptScope.INFERENCE
+            else [],
+            "supported_training_instance_types": []
+            if scope == JumpStartScriptScope.TRAINING
+            else [],
+        }
+
+        # Mock config rankings
+        ranking = Mock()
+        ranking.rankings = ["config1", "config2", "config3"]
+
+        # Mock the metadata configs container
+        configs = Mock()
+        configs.scope = scope
+        configs.configs = {
+            "config1": config1,
+            "config2": config2,
+            "config3": config3,
+        }
+        configs.config_rankings = {JumpStartConfigRankingName.DEFAULT: ranking}
+
+        # Import the actual get_top_config_from_ranking method so we can test it
+        from sagemaker.jumpstart.types import JumpStartMetadataConfigs
+        configs.get_top_config_from_ranking = JumpStartMetadataConfigs.get_top_config_from_ranking.__get__(configs)
+
+        return configs
+
+    def test_get_top_config_from_ranking_with_matching_instance_type(self):
+        """Test that get_top_config_from_ranking returns config that supports the instance type."""
+        configs = self.create_mock_configs(JumpStartScriptScope.INFERENCE)
+        
+        # Test with instance type that matches config1
+        result = configs.get_top_config_from_ranking(instance_type="ml.g5.xlarge")
+        assert result is not None
+        assert result.config_name == "config1"
+        
+        # Test with instance type that matches config2
+        result = configs.get_top_config_from_ranking(instance_type="ml.p4d.24xlarge")
+        assert result is not None
+        assert result.config_name == "config2"
+
+    def test_get_top_config_from_ranking_with_no_matching_instance_type(self):
+        """Test behavior when no config supports the requested instance type."""
+        configs = self.create_mock_configs(JumpStartScriptScope.INFERENCE)
+        
+        # Test with instance type that doesn't match any config
+        result = configs.get_top_config_from_ranking(instance_type="ml.m5.xlarge")
+        assert result is not None
+        assert result.config_name == "config3"  # Should fall back to config with no restrictions
+
+    def test_get_top_config_from_ranking_without_instance_type(self):
+        """Test that get_top_config_from_ranking returns first ranked config when no instance type specified."""
+        configs = self.create_mock_configs(JumpStartScriptScope.INFERENCE)
+        
+        result = configs.get_top_config_from_ranking()
+        assert result is not None
+        assert result.config_name == "config1"  # First in ranking
+
+    def test_get_top_config_from_ranking_training_scope(self):
+        """Test get_top_config_from_ranking with training scope."""
+        configs = self.create_mock_configs(JumpStartScriptScope.TRAINING)
+        
+        # Test with training instance type
+        result = configs.get_top_config_from_ranking(instance_type="ml.g5.xlarge")
+        assert result is not None
+        assert result.config_name == "config1"
+
+    def test_get_top_config_from_ranking_with_object_resolved_config(self):
+        """Test get_top_config_from_ranking when resolved_config is an object (not dict)."""
+        # Create a mock object with getattr support
+        mock_resolved_config = Mock()
+        mock_resolved_config.supported_inference_instance_types = ["ml.g5.xlarge"]
+        
+        config = Mock()
+        config.config_name = "test_config"
+        config.resolved_config = mock_resolved_config
+        
+        ranking = Mock()
+        ranking.rankings = ["test_config"]
+        
+        configs = Mock()
+        configs.scope = JumpStartScriptScope.INFERENCE
+        configs.configs = {"test_config": config}
+        configs.config_rankings = {JumpStartConfigRankingName.DEFAULT: ranking}
+        
+        # Import the actual method
+        from sagemaker.jumpstart.types import JumpStartMetadataConfigs
+        configs.get_top_config_from_ranking = JumpStartMetadataConfigs.get_top_config_from_ranking.__get__(configs)
+        
+        result = configs.get_top_config_from_ranking(instance_type="ml.g5.xlarge")
+        assert result is not None
+        assert result.config_name == "test_config"
+
+    def test_get_top_config_from_ranking_empty_supported_instance_types(self):
+        """Test behavior when config has empty supported_instance_types list."""
+        config = Mock()
+        config.config_name = "empty_config"
+        config.resolved_config = {
+            "supported_inference_instance_types": [],
+        }
+        
+        ranking = Mock()
+        ranking.rankings = ["empty_config"]
+        
+        configs = Mock()
+        configs.scope = JumpStartScriptScope.INFERENCE
+        configs.configs = {"empty_config": config}
+        configs.config_rankings = {JumpStartConfigRankingName.DEFAULT: ranking}
+        
+        # Import the actual method
+        from sagemaker.jumpstart.types import JumpStartMetadataConfigs
+        configs.get_top_config_from_ranking = JumpStartMetadataConfigs.get_top_config_from_ranking.__get__(configs)
+        
+        # Should return config even with empty list (no restrictions)
+        result = configs.get_top_config_from_ranking(instance_type="ml.g5.xlarge")
+        assert result is not None
+        assert result.config_name == "empty_config"
+
+    def test_instance_type_parameter_signature(self):
+        """Test that get_top_ranked_config_name function accepts instance_type parameter."""
+        # Import and inspect the function signature
+        import inspect
+        from typing import Optional
+        sig = inspect.signature(utils.get_top_ranked_config_name)
+        
+        # Verify that instance_type parameter exists in the signature
+        assert "instance_type" in sig.parameters
+        
+        # Verify it's an optional parameter with None default
+        instance_type_param = sig.parameters["instance_type"]
+        assert instance_type_param.default is None
+        assert instance_type_param.annotation == Optional[str]
+
+    def test_get_top_config_from_ranking_preserves_existing_config_name(self):
+        """Test that existing config_name is preserved when already specified."""
+        mock_get_config = Mock(return_value="auto_selected")
+        
+        with patch("sagemaker.jumpstart.utils.get_top_ranked_config_name", mock_get_config):
+            kwargs = JumpStartEstimatorInitKwargs(
+                model_id="test-model",
+                instance_type="ml.g5.xlarge",
+                config_name="user_specified_config",
+            )
+            
+            result = _add_config_name_to_kwargs(kwargs)
+            
+            # Should not call get_top_ranked_config_name when config_name already exists
+            mock_get_config.assert_not_called()
+            assert result.config_name == "user_specified_config"
+
+    def test_config_ranking_respects_priority_with_instance_type_filter(self):
+        """Test that config ranking priority is respected when filtering by instance type."""
+        # Create configs where config2 is ranked higher but config1 matches instance type
+        config1 = Mock()
+        config1.config_name = "config1"
+        config1.resolved_config = {"supported_inference_instance_types": ["ml.g5.xlarge"]}
+        
+        config2 = Mock()
+        config2.config_name = "config2"
+        config2.resolved_config = {"supported_inference_instance_types": ["ml.p4d.24xlarge"]}
+        
+        # Rank config2 higher than config1
+        ranking = Mock()
+        ranking.rankings = ["config2", "config1"]
+        
+        configs = Mock()
+        configs.scope = JumpStartScriptScope.INFERENCE
+        configs.configs = {"config1": config1, "config2": config2}
+        configs.config_rankings = {JumpStartConfigRankingName.DEFAULT: ranking}
+        
+        # Import the actual method
+        from sagemaker.jumpstart.types import JumpStartMetadataConfigs
+        configs.get_top_config_from_ranking = JumpStartMetadataConfigs.get_top_config_from_ranking.__get__(configs)
+        
+        # Even though config2 is ranked higher, config1 should be returned because it matches instance type
+        result = configs.get_top_config_from_ranking(instance_type="ml.g5.xlarge")
+        assert result is not None
+        assert result.config_name == "config1"
\ No newline at end of file

From 934123d1b638f68bce210b11fade971a3dbe60b6 Mon Sep 17 00:00:00 2001
From: Tanvika Boyineni <tanvikab@amazon.com>
Date: Thu, 7 Aug 2025 11:11:37 -0700
Subject: [PATCH 4/4] chore: clean up files

---
 specfileex                 | 2960 ------------------------------------
 test_unified_model_card.py |  193 ---
 2 files changed, 3153 deletions(-)
 delete mode 100644 specfileex
 delete mode 100644 test_unified_model_card.py

diff --git a/specfileex b/specfileex
deleted file mode 100644
index e2d15647d0..0000000000
--- a/specfileex
+++ /dev/null
@@ -1,2960 +0,0 @@
-{
-    "model_id": "meta-textgeneration-llama-2-7b-f",
-    "provider": "meta",
-    "url": "https://ai.meta.com/resources/models-and-libraries/llama-downloads/",
-    "version": "4.19.0",
-    "min_sdk_version": "2.225.0",
-    "training_supported": true,
-    "incremental_training_supported": true,
-    "hosting_ecr_specs": {
-        "framework": "huggingface-llm",
-        "framework_version": "2.0.0",
-        "py_version": "py310"
-    },
-    "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
-    "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
-    "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
-    "hosting_prepacked_artifact_version": "1.1.0",
-    "hosting_use_script_uri": false,
-    "hosting_eula_key": "fmhMetadata/eula/llamaEula.txt",
-    "inference_vulnerable": false,
-    "inference_dependencies": [],
-    "inference_vulnerabilities": [],
-    "training_vulnerable": false,
-    "training_dependencies": [
-        "accelerate==0.33.0",
-        "bitsandbytes==0.39.1",
-        "black==23.7.0",
-        "brotli==1.0.9",
-        "datasets==2.14.1",
-        "docstring-parser==0.16",
-        "fire==0.5.0",
-        "huggingface-hub==0.24.2",
-        "inflate64==0.3.1",
-        "loralib==0.1.1",
-        "multivolumefile==0.2.3",
-        "mypy-extensions==1.0.0",
-        "nvidia-cublas-cu12==12.1.3.1",
-        "nvidia-cuda-cupti-cu12==12.1.105",
-        "nvidia-cuda-nvrtc-cu12==12.1.105",
-        "nvidia-cuda-runtime-cu12==12.1.105",
-        "nvidia-cudnn-cu12==8.9.2.26",
-        "nvidia-cufft-cu12==11.0.2.54",
-        "nvidia-curand-cu12==10.3.2.106",
-        "nvidia-cusolver-cu12==11.4.5.107",
-        "nvidia-cusolver-cu12==11.4.5.107",
-        "nvidia-cusparse-cu12==12.1.0.106",
-        "nvidia-nccl-cu12==2.19.3",
-        "nvidia-nvjitlink-cu12==12.3.101",
-        "nvidia-nvtx-cu12==12.1.105",
-        "pathspec==0.11.1",
-        "peft==0.4.0",
-        "py7zr==0.20.5",
-        "pybcj==1.0.1",
-        "pycryptodomex==3.18.0",
-        "pyppmd==1.0.0",
-        "pyzstd==0.15.9",
-        "safetensors==0.4.2",
-        "sagemaker_jumpstart_huggingface_script_utilities==1.2.7",
-        "sagemaker_jumpstart_script_utilities==1.1.9",
-        "scipy==1.11.1",
-        "shtab==1.7.1",
-        "termcolor==2.3.0",
-        "texttable==1.6.7",
-        "tokenize-rt==5.1.0",
-        "tokenizers==0.19.1",
-        "torch==2.2.0",
-        "transformers==4.43.1",
-        "triton==2.2.0",
-        "trl==0.8.1",
-        "typing-extensions==4.8.0",
-        "tyro==0.7.3"
-    ],
-    "training_vulnerabilities": [],
-    "deprecated": false,
-    "hyperparameters": [
-        {
-            "name": "int8_quantization",
-            "type": "text",
-            "default": "False",
-            "options": [
-                "True",
-                "False"
-            ],
-            "scope": "algorithm"
-        },
-        {
-            "name": "enable_fsdp",
-            "type": "text",
-            "default": "True",
-            "options": [
-                "True",
-                "False"
-            ],
-            "scope": "algorithm"
-        },
-        {
-            "name": "epoch",
-            "type": "int",
-            "default": 1,
-            "min": 1,
-            "max": 1000,
-            "scope": "algorithm"
-        },
-        {
-            "name": "learning_rate",
-            "type": "float",
-            "default": 0.0001,
-            "min": 1e-08,
-            "max": 1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "lora_r",
-            "type": "int",
-            "default": 8,
-            "min": 1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "lora_alpha",
-            "type": "int",
-            "default": 32,
-            "min": 1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "target_modules",
-            "type": "text",
-            "default": "q_proj,v_proj",
-            "scope": "algorithm"
-        },
-        {
-            "name": "lora_dropout",
-            "type": "float",
-            "default": 0.05,
-            "min": 0,
-            "max": 1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "instruction_tuned",
-            "type": "text",
-            "default": "False",
-            "options": [
-                "True",
-                "False"
-            ],
-            "scope": "algorithm"
-        },
-        {
-            "name": "chat_dataset",
-            "type": "text",
-            "default": "True",
-            "options": [
-                "True",
-                "False"
-            ],
-            "scope": "algorithm"
-        },
-        {
-            "name": "add_input_output_demarcation_key",
-            "type": "text",
-            "default": "True",
-            "options": [
-                "True",
-                "False"
-            ],
-            "scope": "algorithm"
-        },
-        {
-            "name": "per_device_train_batch_size",
-            "type": "int",
-            "default": 1,
-            "min": 1,
-            "max": 1000,
-            "scope": "algorithm"
-        },
-        {
-            "name": "per_device_eval_batch_size",
-            "type": "int",
-            "default": 1,
-            "min": 1,
-            "max": 1000,
-            "scope": "algorithm"
-        },
-        {
-            "name": "max_train_samples",
-            "type": "int",
-            "default": -1,
-            "min": -1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "max_val_samples",
-            "type": "int",
-            "default": -1,
-            "min": -1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "seed",
-            "type": "int",
-            "default": 10,
-            "min": 1,
-            "max": 1000,
-            "scope": "algorithm"
-        },
-        {
-            "name": "max_input_length",
-            "type": "int",
-            "default": -1,
-            "min": -1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "validation_split_ratio",
-            "type": "float",
-            "default": 0.2,
-            "min": 0,
-            "max": 1,
-            "scope": "algorithm"
-        },
-        {
-            "name": "train_data_split_seed",
-            "type": "int",
-            "default": 0,
-            "min": 0,
-            "scope": "algorithm"
-        },
-        {
-            "name": "preprocessing_num_workers",
-            "type": "text",
-            "default": "None",
-            "scope": "algorithm"
-        },
-        {
-            "name": "sagemaker_submit_directory",
-            "type": "text",
-            "default": "/opt/ml/input/data/code/sourcedir.tar.gz",
-            "scope": "container"
-        },
-        {
-            "name": "sagemaker_program",
-            "type": "text",
-            "default": "transfer_learning.py",
-            "scope": "container"
-        },
-        {
-            "name": "sagemaker_container_log_level",
-            "type": "text",
-            "default": "20",
-            "scope": "container"
-        }
-    ],
-    "training_script_key": "source-directory-tarballs/training/meta-textgeneration/v1.2.0/sourcedir.tar.gz",
-    "training_prepacked_script_key": "source-directory-tarballs/training/meta-textgeneration/prepack/inference-meta-textgeneration/v1.2.0/sourcedir.tar.gz",
-    "training_prepacked_script_version": "1.2.0",
-    "training_ecr_specs": {
-        "framework": "huggingface",
-        "framework_version": "2.0.0",
-        "py_version": "py310",
-        "huggingface_transformers_version": "4.28.1"
-    },
-    "training_artifact_key": "meta-training/v1.1.0/train-meta-textgeneration-llama-2-7b-f.tar.gz",
-    "inference_environment_variables": [
-        {
-            "name": "SAGEMAKER_PROGRAM",
-            "type": "text",
-            "default": "inference.py",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "SAGEMAKER_SUBMIT_DIRECTORY",
-            "type": "text",
-            "default": "/opt/ml/model/code",
-            "scope": "container",
-            "required_for_model_class": false
-        },
-        {
-            "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
-            "type": "text",
-            "default": "20",
-            "scope": "container",
-            "required_for_model_class": false
-        },
-        {
-            "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
-            "type": "text",
-            "default": "3600",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "ENDPOINT_SERVER_TIMEOUT",
-            "type": "int",
-            "default": 3600,
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "MODEL_CACHE_ROOT",
-            "type": "text",
-            "default": "/opt/ml/model",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "SAGEMAKER_ENV",
-            "type": "text",
-            "default": "1",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "HF_MODEL_ID",
-            "type": "text",
-            "default": "/opt/ml/model",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "OPTION_GPU_MEMORY_UTILIZATION",
-            "type": "text",
-            "default": "0.85",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "SM_NUM_GPUS",
-            "type": "text",
-            "default": "1",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "MAX_INPUT_LENGTH",
-            "type": "text",
-            "default": "4095",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "MAX_TOTAL_TOKENS",
-            "type": "text",
-            "default": "4096",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "MAX_BATCH_PREFILL_TOKENS",
-            "type": "text",
-            "default": "8192",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "MAX_CONCURRENT_REQUESTS",
-            "type": "text",
-            "default": "512",
-            "scope": "container",
-            "required_for_model_class": true
-        },
-        {
-            "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
-            "type": "int",
-            "default": 1,
-            "scope": "container",
-            "required_for_model_class": true
-        }
-    ],
-    "metrics": [
-        {
-            "Name": "huggingface-textgeneration:eval-loss",
-            "Regex": "eval_epoch_loss=tensor\\(([0-9\\.]+)"
-        },
-        {
-            "Name": "huggingface-textgeneration:eval-ppl",
-            "Regex": "eval_ppl=tensor\\(([0-9\\.]+)"
-        },
-        {
-            "Name": "huggingface-textgeneration:train-loss",
-            "Regex": "train_epoch_loss=([0-9\\.]+)"
-        }
-    ],
-    "default_inference_instance_type": "ml.g5.12xlarge",
-    "supported_inference_instance_types": [
-        "ml.g5.12xlarge",
-        "ml.g5.24xlarge",
-        "ml.g5.2xlarge",
-        "ml.g5.48xlarge",
-        "ml.g5.4xlarge",
-        "ml.g5.8xlarge",
-        "ml.g6.12xlarge",
-        "ml.p4d.24xlarge"
-    ],
-    "default_training_instance_type": "ml.g5.12xlarge",
-    "supported_training_instance_types": [
-        "ml.g5.12xlarge",
-        "ml.g5.24xlarge",
-        "ml.g5.48xlarge",
-        "ml.p3dn.24xlarge",
-        "ml.g4dn.12xlarge"
-    ],
-    "model_kwargs": {},
-    "estimator_kwargs": {
-        "encrypt_inter_container_traffic": true,
-        "disable_output_compression": true,
-        "max_run": 360000
-    },
-    "fit_kwargs": {},
-    "inference_volume_size": 256,
-    "training_volume_size": 256,
-    "inference_enable_network_isolation": true,
-    "training_enable_network_isolation": true,
-    "default_training_dataset_key": "training-datasets/oasst_top/train/",
-    "validation_supported": true,
-    "fine_tuning_supported": true,
-    "resource_name_base": "meta-textgeneration-llama-2-7b-f",
-    "gated_bucket": true,
-    "training_instance_type_variants": {
-        "regional_aliases": {
-            "af-south-1": {
-                "gpu_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-east-1": {
-                "gpu_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-northeast-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-northeast-2": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-northeast-3": {
-                "gpu_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-south-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-southeast-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-southeast-2": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ap-southeast-3": {
-                "gpu_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ca-central-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "ca-west-1": {
-                "gpu_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "cn-north-1": {
-                "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "cn-northwest-1": {
-                "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "eu-central-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "eu-north-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "eu-south-1": {
-                "gpu_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "eu-west-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "eu-west-2": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "eu-west-3": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "il-central-1": {
-                "gpu_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "me-central-1": {
-                "gpu_ecr_uri_1": "914824155844.dkr.ecr.me-central-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "me-south-1": {
-                "gpu_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "sa-east-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "us-east-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "us-east-2": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "us-gov-east-1": {
-                "gpu_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "us-gov-west-1": {
-                "gpu_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "us-west-1": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            },
-            "us-west-2": {
-                "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04"
-            }
-        },
-        "variants": {
-            "g4dn": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                },
-                "properties": {
-                    "gated_model_key_env_var_value": "meta-training/g4dn/v1.0.0/train-meta-textgeneration-llama-2-7b-f.tar.gz"
-                }
-            },
-            "g5": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                },
-                "properties": {
-                    "gated_model_key_env_var_value": "meta-training/g5/v1.0.0/train-meta-textgeneration-llama-2-7b-f.tar.gz"
-                }
-            },
-            "g6": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "g6e": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "local_gpu": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p2": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p3": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p3dn": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                },
-                "properties": {
-                    "gated_model_key_env_var_value": "meta-training/p3dn/v1.0.0/train-meta-textgeneration-llama-2-7b-f.tar.gz"
-                }
-            },
-            "p4d": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p4de": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p5": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p5e": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p5en": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p6": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            },
-            "p6e": {
-                "regional_properties": {
-                    "image_uri": "$gpu_ecr_uri_1"
-                }
-            }
-        }
-    },
-    "hosting_artifact_s3_data_type": "S3Prefix",
-    "hosting_artifact_compression_type": "None",
-    "dynamic_container_deployment_supported": true,
-    "inference_configs": {
-        "tgi": {
-            "component_names": [
-                "tgi"
-            ]
-        },
-        "lmi": {
-            "component_names": [
-                "lmi"
-            ],
-            "benchmark_metrics": {
-                "ml.g6.12xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.19",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "19.7",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.22",
-                        "concurrency": "32"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "11.6",
-                        "concurrency": "32"
-                    }
-                ],
-                "ml.p4d.24xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "2.58",
-                        "concurrency": "256"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "3448.3",
-                        "concurrency": "256"
-                    }
-                ]
-            }
-        },
-        "lmi-optimized": {
-            "component_names": [
-                "lmi-optimized"
-            ],
-            "benchmark_metrics": {
-                "ml.g5.12xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.23",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "156.2",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.25",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "93.1",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.27",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "58.2",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.29",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "31.0",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.42",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "15.2",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.58",
-                        "concurrency": "32"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "8.0",
-                        "concurrency": "32"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "2.42",
-                        "concurrency": "128"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "4.6",
-                        "concurrency": "128"
-                    }
-                ],
-                "ml.g5.2xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.19",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "66.9",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.19",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "55.5",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.22",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "41.8",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.44",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "31.3",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "2.87",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "71.1",
-                        "concurrency": "16"
-                    }
-                ],
-                "ml.g6.12xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.16",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "107.1",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.17",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "79.5",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.19",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "55.1",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.21",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "34.4",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "3.75",
-                        "concurrency": "64"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "8.3",
-                        "concurrency": "64"
-                    }
-                ],
-                "ml.g6.2xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.23",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "38.2",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.30",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "32.9",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.30",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "24.5",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.60",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "21.0",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "4.19",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "50.0",
-                        "concurrency": "16"
-                    }
-                ],
-                "ml.p4d.24xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.06",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "150.2",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.06",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "149.0",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.06",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "149.0",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.06",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "141.0",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.06",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "128.9",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.06",
-                        "concurrency": "32"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "105.2",
-                        "concurrency": "32"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.07",
-                        "concurrency": "64"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "73.9",
-                        "concurrency": "64"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.37",
-                        "concurrency": "128"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "68.4",
-                        "concurrency": "128"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "4.58",
-                        "concurrency": "512"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "11111.1",
-                        "concurrency": "512"
-                    }
-                ],
-                "ml.p5.48xlarge": [
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.04",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "253.2",
-                        "concurrency": "1"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.03",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "257.1",
-                        "concurrency": "2"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.03",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "252.5",
-                        "concurrency": "4"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.03",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "236.4",
-                        "concurrency": "8"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.04",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "213.2",
-                        "concurrency": "16"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.04",
-                        "concurrency": "32"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "178.6",
-                        "concurrency": "32"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.04",
-                        "concurrency": "64"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "129.0",
-                        "concurrency": "64"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.04",
-                        "concurrency": "128"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "81.2",
-                        "concurrency": "128"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "0.33",
-                        "concurrency": "256"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "74.5",
-                        "concurrency": "256"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "1.77",
-                        "concurrency": "512"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "724.6",
-                        "concurrency": "512"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "2.96",
-                        "concurrency": "768"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "6666.7",
-                        "concurrency": "768"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "2.22",
-                        "concurrency": "1024"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "5882.4",
-                        "concurrency": "1024"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "3.88",
-                        "concurrency": "1280"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "11111.1",
-                        "concurrency": "1280"
-                    },
-                    {
-                        "name": "latency",
-                        "unit": "sec",
-                        "value": "3.99",
-                        "concurrency": "1536"
-                    },
-                    {
-                        "name": "throughput",
-                        "unit": "tokens/sec",
-                        "value": "11111.1",
-                        "concurrency": "1536"
-                    }
-                ]
-            },
-            "acceleration_configs": [
-                {
-                    "type": "Compilation",
-                    "enabled": false
-                },
-                {
-                    "type": "Speculative-Decoding",
-                    "enabled": true
-                },
-                {
-                    "type": "Quantization",
-                    "enabled": false
-                }
-            ]
-        },
-        "neuron": {
-            "component_names": [
-                "neuron"
-            ]
-        }
-    },
-    "inference_config_components": {
-        "tgi": {
-            "hosting_ecr_specs": {
-                "framework": "huggingface-llm",
-                "framework_version": "2.0.0",
-                "py_version": "py310"
-            },
-            "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
-            "hosting_use_script_uri": false,
-            "inference_dependencies": [],
-            "inference_vulnerable": false,
-            "inference_vulnerabilities": [],
-            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
-            "hosting_prepacked_artifact_version": "1.1.0",
-            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
-            "hosting_artifact_s3_data_type": "S3Prefix",
-            "hosting_artifact_compression_type": "None",
-            "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
-            "hosting_neuron_model_version": "1.0.0",
-            "model_kwargs": {},
-            "deploy_kwargs": {
-                "model_data_download_timeout": 1200,
-                "container_startup_health_check_timeout": 1200
-            },
-            "predictor_specs": {
-                "supported_content_types": [
-                    "application/json"
-                ],
-                "supported_accept_types": [
-                    "application/json"
-                ],
-                "default_content_type": "application/json",
-                "default_accept_type": "application/json"
-            },
-            "default_inference_instance_type": "ml.g5.12xlarge",
-            "supported_inference_instance_types": [
-                "ml.g5.12xlarge",
-                "ml.g5.24xlarge",
-                "ml.g5.2xlarge",
-                "ml.g5.48xlarge",
-                "ml.g5.4xlarge",
-                "ml.g5.8xlarge",
-                "ml.g6.12xlarge",
-                "ml.p4d.24xlarge"
-            ],
-            "hosting_instance_type_variants": {
-                "regional_aliases": {
-                    "af-south-1": {
-                        "gpu_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-east-1": {
-                        "gpu_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-east-2": {
-                        "gpu_ecr_uri_1": "975050140332.dkr.ecr.ap-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-northeast-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-northeast-2": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-northeast-3": {
-                        "gpu_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-south-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-south-2": {
-                        "gpu_ecr_uri_1": "772153158452.dkr.ecr.ap-south-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-southeast-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-southeast-2": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-southeast-3": {
-                        "gpu_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-southeast-4": {
-                        "gpu_ecr_uri_1": "457447274322.dkr.ecr.ap-southeast-4.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-southeast-5": {
-                        "gpu_ecr_uri_1": "550225433462.dkr.ecr.ap-southeast-5.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ap-southeast-7": {
-                        "gpu_ecr_uri_1": "590183813437.dkr.ecr.ap-southeast-7.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ca-central-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "ca-west-1": {
-                        "gpu_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "cn-north-1": {
-                        "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "cn-northwest-1": {
-                        "gpu_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-central-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-central-2": {
-                        "gpu_ecr_uri_1": "380420809688.dkr.ecr.eu-central-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-north-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-south-1": {
-                        "gpu_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-south-2": {
-                        "gpu_ecr_uri_1": "503227376785.dkr.ecr.eu-south-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-west-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-west-2": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "eu-west-3": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "il-central-1": {
-                        "gpu_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "me-central-1": {
-                        "gpu_ecr_uri_1": "914824155844.dkr.ecr.me-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "me-south-1": {
-                        "gpu_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "mx-central-1": {
-                        "gpu_ecr_uri_1": "637423239942.dkr.ecr.mx-central-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "sa-east-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "us-east-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "us-east-2": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "us-gov-east-1": {
-                        "gpu_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "us-gov-west-1": {
-                        "gpu_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "us-west-1": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    },
-                    "us-west-2": {
-                        "gpu_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi2.0.0-gpu-py310-cu121-ubuntu22.04"
-                    }
-                },
-                "variants": {
-                    "g4dn": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "g5": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "g6": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "g6e": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "local_gpu": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p2": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p3": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p3dn": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p4d": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p4de": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p5": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p5e": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p5en": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p6": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "p6e": {
-                        "regional_properties": {
-                            "image_uri": "$gpu_ecr_uri_1"
-                        }
-                    },
-                    "ml.g5.12xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "SM_NUM_GPUS": "4",
-                                "MAX_BATCH_PREFILL_TOKENS": "16384"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 98304,
-                                "num_accelerators": 4
-                            }
-                        }
-                    },
-                    "ml.g5.24xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "SM_NUM_GPUS": "4"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 196608,
-                                "num_accelerators": 4
-                            }
-                        }
-                    },
-                    "ml.g5.48xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "SM_NUM_GPUS": "8"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 393216,
-                                "num_accelerators": 8
-                            }
-                        }
-                    },
-                    "ml.p4d.24xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "SM_NUM_GPUS": "8",
-                                "MAX_BATCH_PREFILL_TOKENS": "16384"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 589824,
-                                "num_accelerators": 8
-                            }
-                        }
-                    },
-                    "ml.p5.48xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_GPU_MEMORY_UTILIZATION": "0.95"
-                            }
-                        }
-                    },
-                    "ml.g5.2xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 16384,
-                                "num_accelerators": 1
-                            }
-                        }
-                    },
-                    "ml.g5.4xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 32768,
-                                "num_accelerators": 1
-                            }
-                        }
-                    },
-                    "ml.g5.8xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 65536,
-                                "num_accelerators": 1
-                            }
-                        }
-                    }
-                }
-            },
-            "inference_volume_size": 256,
-            "inference_enable_network_isolation": true,
-            "hosting_resource_requirements": {
-                "min_memory_mb": 98304,
-                "num_accelerators": 4
-            },
-            "inference_environment_variables": [
-                {
-                    "name": "SAGEMAKER_PROGRAM",
-                    "type": "text",
-                    "default": "inference.py",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
-                    "type": "text",
-                    "default": "/opt/ml/model/code",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
-                    "type": "text",
-                    "default": "20",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
-                    "type": "text",
-                    "default": "3600",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "ENDPOINT_SERVER_TIMEOUT",
-                    "type": "int",
-                    "default": 3600,
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MODEL_CACHE_ROOT",
-                    "type": "text",
-                    "default": "/opt/ml/model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_ENV",
-                    "type": "text",
-                    "default": "1",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "HF_MODEL_ID",
-                    "type": "text",
-                    "default": "/opt/ml/model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "OPTION_GPU_MEMORY_UTILIZATION",
-                    "type": "text",
-                    "default": "0.85",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SM_NUM_GPUS",
-                    "type": "text",
-                    "default": "1",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MAX_INPUT_LENGTH",
-                    "type": "text",
-                    "default": "4095",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MAX_TOTAL_TOKENS",
-                    "type": "text",
-                    "default": "4096",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MAX_BATCH_PREFILL_TOKENS",
-                    "type": "text",
-                    "default": "8192",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MAX_CONCURRENT_REQUESTS",
-                    "type": "text",
-                    "default": "512",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
-                    "type": "int",
-                    "default": 1,
-                    "scope": "container",
-                    "required_for_model_class": true
-                }
-            ],
-            "default_payloads": {
-                "pingExponentialBackoff": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text",
-                        "input_logprobs": "[0].details.prefill[*].logprob"
-                    },
-                    "body": {
-                        "inputs": "import socket\n\ndef ping_exponential_backoff(host: str):",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.2,
-                            "decoder_input_details": true,
-                            "details": true
-                        }
-                    }
-                },
-                "argparse": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text"
-                    },
-                    "body": {
-                        "inputs": "import argparse\n\ndef main(string: str):\n    print(string)\n    print(string[::-1])\n\nif __name__ == \"__main__\":",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05
-                        }
-                    }
-                },
-                "Fibonacci": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text",
-                        "input_logprobs": "[0].details.prefill[*].logprob"
-                    },
-                    "body": {
-                        "inputs": "def fib(n):\n",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.2,
-                            "decoder_input_details": true,
-                            "details": true
-                        }
-                    }
-                },
-                "removeNonAscii": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text",
-                        "input_logprobs": "[0].details.prefill[*].logprob"
-                    },
-                    "body": {
-                        "inputs": "def remove_non_ascii(s: str) -> str:\n    \"\"\"<FILL>\n    return result\n",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05,
-                            "decoder_input_details": true,
-                            "details": true
-                        }
-                    }
-                },
-                "installationInstructions": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text"
-                    },
-                    "body": {
-                        "inputs": "# Installation instructions:\n    ```bash\n<FILL>\n    ```\nThis downloads the LLaMA inference code and installs the repository as a local pip package.\n",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05
-                        }
-                    }
-                },
-                "interfaceManager": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text"
-                    },
-                    "body": {
-                        "inputs": "class InterfaceManagerFactory(AbstractManagerFactory):\n    def __init__(<FILL>\ndef main():\n    factory = InterfaceManagerFactory(start=datetime.now())\n    managers = []\n    for i in range(10):\n        managers.append(factory.build(id=i))\n",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05
-                        }
-                    }
-                },
-                "quasiPrefunctoid": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text"
-                    },
-                    "body": {
-                        "inputs": "/-- A quasi-prefunctoid is 1-connected iff all its etalisations are 1-connected. -/\ntheorem connected_iff_etalisation [C D : precategoroid] (P : quasi_prefunctoid C D) :\n  π₁ P = 0 ↔ <FILL> = 0 :=\nbegin\n  split,\n  { intros h f,\n    rw pi_1_etalisation at h,\n    simp [h],\n    refl\n  },\n  { intro h,\n    have := @quasi_adjoint C D P,\n    simp [←pi_1_etalisation, this, h],\n    refl\n  }\nend\n",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05
-                        }
-                    }
-                },
-                "bashListTextFiles": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text",
-                        "input_logprobs": "[0].details.prefill[*].logprob"
-                    },
-                    "body": {
-                        "inputs": "<s>[INST] In Bash, how do I list all text files in the current directory (excluding subdirectories) that have been modified in the last month? [/INST] ",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05,
-                            "decoder_input_details": true,
-                            "details": true
-                        }
-                    }
-                },
-                "inorderPreorderTraversal": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text"
-                    },
-                    "body": {
-                        "inputs": "<s>[INST] What is the difference between inorder and preorder traversal? Give an example in Python. [/INST] ",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05
-                        }
-                    }
-                },
-                "contiguousSublists": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "[0].generated_text"
-                    },
-                    "body": {
-                        "inputs": "<s>[INST] <<SYS>>\nProvide answers in JavaScript\n<</SYS>>\n\nWrite a function that computes the set of sums of all contiguous sublists of a given list. [/INST] ",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.05
-                        }
-                    }
-                }
-            }
-        },
-        "lmi": {
-            "hosting_ecr_specs": {
-                "framework": "djl-deepspeed",
-                "framework_version": "0.27.0",
-                "py_version": "py310"
-            },
-            "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
-            "hosting_use_script_uri": false,
-            "inference_dependencies": [],
-            "inference_vulnerable": false,
-            "inference_vulnerabilities": [],
-            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
-            "hosting_prepacked_artifact_version": "1.1.0",
-            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
-            "hosting_artifact_s3_data_type": "S3Prefix",
-            "hosting_artifact_compression_type": "None",
-            "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
-            "hosting_neuron_model_version": "1.0.0",
-            "model_kwargs": {},
-            "deploy_kwargs": {
-                "model_data_download_timeout": 1200,
-                "container_startup_health_check_timeout": 1200
-            },
-            "predictor_specs": {
-                "supported_content_types": [
-                    "application/json"
-                ],
-                "supported_accept_types": [
-                    "application/json"
-                ],
-                "default_content_type": "application/json",
-                "default_accept_type": "application/json"
-            },
-            "default_inference_instance_type": "ml.g5.12xlarge",
-            "supported_inference_instance_types": [
-                "ml.g5.12xlarge",
-                "ml.g5.24xlarge",
-                "ml.g5.2xlarge",
-                "ml.g5.48xlarge",
-                "ml.g5.4xlarge",
-                "ml.g5.8xlarge",
-                "ml.g6.12xlarge",
-                "ml.p4d.24xlarge"
-            ],
-            "hosting_instance_type_variants": {
-                "regional_aliases": {
-                    "af-south-1": {
-                        "alias_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-east-1": {
-                        "alias_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-northeast-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-northeast-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-northeast-3": {
-                        "alias_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-south-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-southeast-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-southeast-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ap-southeast-3": {
-                        "alias_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ca-central-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "ca-west-1": {
-                        "alias_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "cn-north-1": {
-                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "cn-northwest-1": {
-                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "eu-central-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "eu-north-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "eu-south-1": {
-                        "alias_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "eu-west-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "eu-west-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "eu-west-3": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "il-central-1": {
-                        "alias_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "me-south-1": {
-                        "alias_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "sa-east-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "us-east-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "us-east-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "us-gov-east-1": {
-                        "alias_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "us-gov-west-1": {
-                        "alias_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "us-west-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    },
-                    "us-west-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121"
-                    }
-                },
-                "variants": {
-                    "g4dn": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "g5": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "g6": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "g6e": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "local_gpu": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p2": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p3": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p3dn": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p4d": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p4de": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p5": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p5e": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p5en": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p6": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p6e": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "ml.p4d.24xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "1"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 589824,
-                                "num_accelerators": 8
-                            }
-                        }
-                    },
-                    "ml.p5.48xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "1",
-                                "OPTION_GPU_MEMORY_UTILIZATION": "0.95"
-                            }
-                        }
-                    },
-                    "ml.g5.2xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 16384,
-                                "num_accelerators": 1
-                            }
-                        }
-                    },
-                    "ml.g5.4xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 32768,
-                                "num_accelerators": 1
-                            }
-                        }
-                    },
-                    "ml.g5.8xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 65536,
-                                "num_accelerators": 1
-                            }
-                        }
-                    },
-                    "ml.g5.12xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 98304,
-                                "num_accelerators": 4
-                            }
-                        }
-                    },
-                    "ml.g5.24xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 196608,
-                                "num_accelerators": 4
-                            }
-                        }
-                    },
-                    "ml.g5.48xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 393216,
-                                "num_accelerators": 8
-                            }
-                        }
-                    }
-                }
-            },
-            "inference_volume_size": 256,
-            "inference_enable_network_isolation": true,
-            "hosting_resource_requirements": {
-                "min_memory_mb": 98304,
-                "num_accelerators": 4
-            },
-            "inference_environment_variables": [
-                {
-                    "name": "SAGEMAKER_PROGRAM",
-                    "type": "text",
-                    "default": "inference.py",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
-                    "type": "text",
-                    "default": "/opt/ml/model/code",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
-                    "type": "text",
-                    "default": "20",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
-                    "type": "text",
-                    "default": "3600",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "ENDPOINT_SERVER_TIMEOUT",
-                    "type": "int",
-                    "default": 3600,
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MODEL_CACHE_ROOT",
-                    "type": "text",
-                    "default": "/opt/ml/model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_ENV",
-                    "type": "text",
-                    "default": "1",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "HF_MODEL_ID",
-                    "type": "text",
-                    "default": "/opt/ml/model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "OPTION_GPU_MEMORY_UTILIZATION",
-                    "type": "text",
-                    "default": "0.85",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
-                    "type": "int",
-                    "default": 1,
-                    "scope": "container",
-                    "required_for_model_class": true
-                }
-            ],
-            "default_payloads": {
-                "meaningOfLife": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "I believe the meaning of life is",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6,
-                            "decoder_input_details": true,
-                            "details": true
-                        }
-                    }
-                },
-                "theoryOfRelativity": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "Simply put, the theory of relativity states that ",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "teamMessage": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "A brief message congratulating the team on the launch:\n\nHi everyone,\n\nI just ",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "englishToFrench": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "Translate English to French:\nsea otter => loutre de mer\npeppermint => menthe poivrée\nplush girafe => girafe peluche\ncheese =>",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                }
-            }
-        },
-        "lmi-optimized": {
-            "hosting_ecr_specs": {
-                "framework": "djl-lmi",
-                "framework_version": "0.28.0",
-                "py_version": "py310"
-            },
-            "hosting_script_key": "source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz",
-            "hosting_use_script_uri": false,
-            "inference_dependencies": [],
-            "inference_vulnerable": false,
-            "inference_vulnerabilities": [],
-            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference/v1.1.0/",
-            "hosting_prepacked_artifact_version": "1.1.0",
-            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/inference-prepack/v1.1.0/",
-            "hosting_artifact_s3_data_type": "S3Prefix",
-            "hosting_artifact_compression_type": "None",
-            "hosting_additional_data_sources": {
-                "speculative_decoding": [
-                    {
-                        "channel_name": "draft_model",
-                        "artifact_version": "v2",
-                        "s3_data_source": {
-                            "compression_type": "None",
-                            "s3_data_type": "S3Prefix",
-                            "s3_uri": "sagemaker-speculative-decoding-llama2-tiny-v2/"
-                        }
-                    }
-                ]
-            },
-            "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
-            "hosting_neuron_model_version": "1.0.0",
-            "model_kwargs": {},
-            "deploy_kwargs": {
-                "model_data_download_timeout": 1200,
-                "container_startup_health_check_timeout": 1200
-            },
-            "predictor_specs": {
-                "supported_content_types": [
-                    "application/json"
-                ],
-                "supported_accept_types": [
-                    "application/json"
-                ],
-                "default_content_type": "application/json",
-                "default_accept_type": "application/json"
-            },
-            "default_inference_instance_type": "ml.p4d.24xlarge",
-            "supported_inference_instance_types": [
-                "ml.g5.12xlarge",
-                "ml.g5.2xlarge",
-                "ml.g6.12xlarge",
-                "ml.g6.2xlarge",
-                "ml.p4d.24xlarge",
-                "ml.p4de.24xlarge",
-                "ml.p5.48xlarge"
-            ],
-            "hosting_instance_type_variants": {
-                "regional_aliases": {
-                    "af-south-1": {
-                        "alias_ecr_uri_1": "626614931356.dkr.ecr.af-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-east-1": {
-                        "alias_ecr_uri_1": "871362719292.dkr.ecr.ap-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-northeast-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-northeast-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-northeast-3": {
-                        "alias_ecr_uri_1": "364406365360.dkr.ecr.ap-northeast-3.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-south-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-southeast-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-southeast-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ap-southeast-3": {
-                        "alias_ecr_uri_1": "907027046896.dkr.ecr.ap-southeast-3.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ca-central-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ca-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "ca-west-1": {
-                        "alias_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "cn-north-1": {
-                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-north-1.amazonaws.com.cn/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "cn-northwest-1": {
-                        "alias_ecr_uri_1": "727897471807.dkr.ecr.cn-northwest-1.amazonaws.com.cn/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "eu-central-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "eu-north-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-north-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "eu-south-1": {
-                        "alias_ecr_uri_1": "692866216735.dkr.ecr.eu-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "eu-west-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "eu-west-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "eu-west-3": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "il-central-1": {
-                        "alias_ecr_uri_1": "780543022126.dkr.ecr.il-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "me-central-1": {
-                        "alias_ecr_uri_1": "914824155844.dkr.ecr.me-central-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "me-south-1": {
-                        "alias_ecr_uri_1": "217643126080.dkr.ecr.me-south-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "sa-east-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "us-east-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "us-east-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "us-gov-east-1": {
-                        "alias_ecr_uri_1": "446045086412.dkr.ecr.us-gov-east-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "us-gov-west-1": {
-                        "alias_ecr_uri_1": "442386744353.dkr.ecr.us-gov-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "us-west-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-1.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    },
-                    "us-west-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124"
-                    }
-                },
-                "variants": {
-                    "g4dn": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "g5": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "g6": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "g6e": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "local_gpu": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p2": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p3": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p3dn": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p4d": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p4de": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p5": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p5e": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p5en": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p6": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "p6e": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "ml.p4d.24xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "1"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 589824,
-                                "num_accelerators": 8
-                            }
-                        }
-                    },
-                    "ml.p5.48xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "1",
-                                "OPTION_GPU_MEMORY_UTILIZATION": "0.95"
-                            }
-                        }
-                    },
-                    "ml.p4de.24xlarge": {
-                        "properties": {
-                            "resource_requirements": {
-                                "min_memory_mb": 589824,
-                                "num_accelerators": 8
-                            }
-                        }
-                    }
-                }
-            },
-            "inference_volume_size": 256,
-            "inference_enable_network_isolation": true,
-            "hosting_resource_requirements": {
-                "min_memory_mb": 589824,
-                "num_accelerators": 8
-            },
-            "inference_environment_variables": [
-                {
-                    "name": "SAGEMAKER_PROGRAM",
-                    "type": "text",
-                    "default": "inference.py",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
-                    "type": "text",
-                    "default": "/opt/ml/model/code",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
-                    "type": "text",
-                    "default": "20",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
-                    "type": "text",
-                    "default": "3600",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "ENDPOINT_SERVER_TIMEOUT",
-                    "type": "int",
-                    "default": 3600,
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MODEL_CACHE_ROOT",
-                    "type": "text",
-                    "default": "/opt/ml/model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_ENV",
-                    "type": "text",
-                    "default": "1",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "HF_MODEL_ID",
-                    "type": "text",
-                    "default": "/opt/ml/model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "OPTION_SPECULATIVE_DRAFT_MODEL",
-                    "type": "text",
-                    "default": "/opt/ml/additional-model-data-sources/draft_model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "OPTION_GPU_MEMORY_UTILIZATION",
-                    "type": "text",
-                    "default": "0.85",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
-                    "type": "int",
-                    "default": 1,
-                    "scope": "container",
-                    "required_for_model_class": true
-                }
-            ],
-            "default_payloads": {
-                "meaningOfLife": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "I believe the meaning of life is",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "theoryOfRelativity": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "Simply put, the theory of relativity states that ",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "teamMessage": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "A brief message congratulating the team on the launch:\n\nHi everyone,\n\nI just ",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "englishToFrench": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "Translate English to French:\nsea otter => loutre de mer\npeppermint => menthe poivrée\nplush girafe => girafe peluche\ncheese =>",
-                        "parameters": {
-                            "max_new_tokens": 64,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                }
-            }
-        },
-        "neuron": {
-            "hosting_ecr_specs": {
-                "framework": "djl-neuronx",
-                "framework_version": "0.24.0",
-                "py_version": "py39"
-            },
-            "hosting_script_key": "source-directory-tarballs/meta/inference/textgenerationneuron/v1.0.0/sourcedir.tar.gz",
-            "hosting_use_script_uri": false,
-            "inference_dependencies": [
-                "sagemaker_jumpstart_huggingface_script_utilities==1.0.8",
-                "sagemaker_jumpstart_script_utilities==1.1.8"
-            ],
-            "inference_vulnerable": false,
-            "inference_vulnerabilities": [],
-            "hosting_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/neuron/inference/v1.0.0/",
-            "hosting_prepacked_artifact_version": "1.0.0",
-            "hosting_prepacked_artifact_key": "meta-textgeneration/meta-textgeneration-llama-2-7b-f/artifacts/neuron/inference-prepack/v1.0.0/",
-            "hosting_artifact_s3_data_type": "S3Prefix",
-            "hosting_artifact_compression_type": "None",
-            "hosting_neuron_model_id": "meta-textgeneration-llama-2-7b-f",
-            "hosting_neuron_model_version": "1.0.0",
-            "model_kwargs": {},
-            "deploy_kwargs": {
-                "model_data_download_timeout": 3600,
-                "container_startup_health_check_timeout": 3600
-            },
-            "predictor_specs": {
-                "supported_content_types": [
-                    "application/json"
-                ],
-                "supported_accept_types": [
-                    "application/json"
-                ],
-                "default_content_type": "application/json",
-                "default_accept_type": "application/json"
-            },
-            "default_inference_instance_type": "ml.inf2.xlarge",
-            "supported_inference_instance_types": [
-                "ml.inf2.xlarge",
-                "ml.inf2.8xlarge",
-                "ml.inf2.24xlarge",
-                "ml.inf2.48xlarge"
-            ],
-            "hosting_instance_type_variants": {
-                "regional_aliases": {
-                    "ap-northeast-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "ap-south-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-south-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "ap-southeast-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "ap-southeast-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.ap-southeast-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "ap-southeast-5": {
-                        "alias_ecr_uri_1": "550225433462.dkr.ecr.ap-southeast-5.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "ap-southeast-7": {
-                        "alias_ecr_uri_1": "590183813437.dkr.ecr.ap-southeast-7.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "ca-west-1": {
-                        "alias_ecr_uri_1": "204538143572.dkr.ecr.ca-west-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "eu-central-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "eu-central-2": {
-                        "alias_ecr_uri_1": "380420809688.dkr.ecr.eu-central-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "eu-west-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "eu-west-3": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.eu-west-3.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "mx-central-1": {
-                        "alias_ecr_uri_1": "637423239942.dkr.ecr.mx-central-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "sa-east-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.sa-east-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "us-east-1": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "us-east-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    },
-                    "us-west-2": {
-                        "alias_ecr_uri_1": "763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.24.0-neuronx-sdk2.14.1"
-                    }
-                },
-                "variants": {
-                    "inf2": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "trn1": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "trn1n": {
-                        "regional_properties": {
-                            "image_uri": "$alias_ecr_uri_1"
-                        }
-                    },
-                    "ml.inf2.xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "2",
-                                "OPTION_N_POSITIONS": "1024",
-                                "OPTION_DTYPE": "fp16",
-                                "OPTION_ROLLING_BATCH": "auto",
-                                "OPTION_MAX_ROLLING_BATCH_SIZE": "1",
-                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 8192,
-                                "num_accelerators": 1
-                            }
-                        }
-                    },
-                    "ml.inf2.8xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "2",
-                                "OPTION_N_POSITIONS": "2048",
-                                "OPTION_DTYPE": "fp16",
-                                "OPTION_ROLLING_BATCH": "auto",
-                                "OPTION_MAX_ROLLING_BATCH_SIZE": "4",
-                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 65536,
-                                "num_accelerators": 1
-                            }
-                        }
-                    },
-                    "ml.inf2.24xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "12",
-                                "OPTION_N_POSITIONS": "4096",
-                                "OPTION_DTYPE": "fp16",
-                                "OPTION_ROLLING_BATCH": "auto",
-                                "OPTION_MAX_ROLLING_BATCH_SIZE": "4",
-                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 196608,
-                                "num_accelerators": 6
-                            }
-                        }
-                    },
-                    "ml.inf2.48xlarge": {
-                        "properties": {
-                            "environment_variables": {
-                                "OPTION_TENSOR_PARALLEL_DEGREE": "24",
-                                "OPTION_N_POSITIONS": "4096",
-                                "OPTION_DTYPE": "fp16",
-                                "OPTION_ROLLING_BATCH": "auto",
-                                "OPTION_MAX_ROLLING_BATCH_SIZE": "4",
-                                "OPTION_NEURON_OPTIMIZE_LEVEL": "2"
-                            },
-                            "resource_requirements": {
-                                "min_memory_mb": 393216,
-                                "num_accelerators": 12
-                            }
-                        }
-                    }
-                }
-            },
-            "inference_volume_size": 256,
-            "inference_enable_network_isolation": false,
-            "hosting_resource_requirements": {
-                "min_memory_mb": 8192,
-                "num_accelerators": 1
-            },
-            "inference_environment_variables": [
-                {
-                    "name": "SAGEMAKER_PROGRAM",
-                    "type": "text",
-                    "default": "inference.py",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_SUBMIT_DIRECTORY",
-                    "type": "text",
-                    "default": "/opt/ml/model/code",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
-                    "type": "text",
-                    "default": "20",
-                    "scope": "container",
-                    "required_for_model_class": false
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
-                    "type": "text",
-                    "default": "3600",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "ENDPOINT_SERVER_TIMEOUT",
-                    "type": "int",
-                    "default": 3600,
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "MODEL_CACHE_ROOT",
-                    "type": "text",
-                    "default": "/opt/ml/model",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_ENV",
-                    "type": "text",
-                    "default": "1",
-                    "scope": "container",
-                    "required_for_model_class": true
-                },
-                {
-                    "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
-                    "type": "int",
-                    "default": 1,
-                    "scope": "container",
-                    "required_for_model_class": true
-                }
-            ],
-            "default_payloads": {
-                "mayonnaise": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "<s>[INST] what is the recipe of mayonnaise? [/INST] ",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "parisTrip": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "<s>[INST] I am going to Paris, what should I see? [/INST] Paris, the capital of France, is known for its stunning architecture, art museums, historical landmarks, and romantic atmosphere. Here are some of the top attractions to see in Paris:\n\n1. The Eiffel Tower: The iconic Eiffel Tower is one of the most recognizable landmarks in the world and offers breathtaking views of the city.\n2. The Louvre Museum: The Louvre is one of the world's largest and most famous museums, housing an impressive collection of art and artifacts, including the Mona Lisa.\n3. Notre-Dame Cathedral: This beautiful cathedral is one of the most famous landmarks in Paris and is known for its Gothic architecture and stunning stained glass windows.\n\nThese are just a few of the many attractions that Paris has to offer. With so much to see and do, it's no wonder that Paris is one of the most popular tourist destinations in the world.</s><s>[INST] What is so great about #1? [/INST] ",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "parisHaiku": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "<s>[INST] <<SYS>>\nAlways answer with Haiku\n<</SYS>>\n\nI am going to Paris, what should I see? [/INST] ",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                },
-                "emojisBeijing": {
-                    "content_type": "application/json",
-                    "prompt_key": "inputs",
-                    "output_keys": {
-                        "generated_text": "generated_text"
-                    },
-                    "body": {
-                        "inputs": "<s>[INST] <<SYS>>\nAlways answer with detailed instruction\n<</SYS>>\n\nHow to go from Beijing to NY? [/INST] ",
-                        "parameters": {
-                            "max_new_tokens": 256,
-                            "top_p": 0.9,
-                            "temperature": 0.6
-                        }
-                    }
-                }
-            }
-        }
-    },
-    "inference_config_rankings": {
-        "overall": {
-            "description": "default",
-            "rankings": [
-                "tgi",
-                "lmi",
-                "lmi-optimized",
-                "neuron"
-            ]
-        }
-    },
-    "hosting_neuron_model_id": "meta-textgenerationneuron-llama-2-7b-f",
-    "hosting_neuron_model_version": "1.0.0"
-}
\ No newline at end of file
diff --git a/test_unified_model_card.py b/test_unified_model_card.py
deleted file mode 100644
index 475d46afdb..0000000000
--- a/test_unified_model_card.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-
-import json
-import os
-import sys
-import boto3
-import time
-from datetime import datetime
-# from urllib.parse import urlparse
-from unittest.mock import patch
-
-os.environ['HUGGING_FACE_HUB_TOKEN'] = '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'
-os.environ['AWS_DEFAULT_REGION'] = 'us-west-2'
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
-
-from sagemaker.jumpstart.model import JumpStartModel
-from sagemaker.jumpstart.types import JumpStartModelSpecs
-from sagemaker.jumpstart.enums import JumpStartModelType
-
-
-def check_aws_account():
-    """Check which AWS account and region we're using."""
-    try:
-        sts_client = boto3.client('sts')
-        identity = sts_client.get_caller_identity()
-        
-        account_id = identity['Account']
-        user_arn = identity['Arn']
-        region = boto3.Session().region_name or 'us-west-2'
-        
-        print(f" AWS Account: {account_id}")
-        print(f" User/Role: {user_arn}")
-        print(f" Region: {region}")
-        print()
-        
-        return account_id, region
-    except Exception as e:
-        print(f" Error checking AWS account: {e}")
-        return None, None
-
-
-def monitor_endpoint(endpoint_name, region='us-west-2'):
-    """Monitor endpoint deployment progress."""
-    sagemaker_client = boto3.client('sagemaker', region_name=region)
-    
-    print(f" Monitoring endpoint: {endpoint_name}")
-    start_time = time.time()
-    
-    while True:
-        try:
-            response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
-            status = response['EndpointStatus']
-            elapsed = int(time.time() - start_time)
-            
-            print(f"  [{elapsed//60}m {elapsed%60}s] {endpoint_name}: {status}")
-            
-            if status == 'InService':
-                print(f" {endpoint_name} is ready! (took {elapsed//60}m {elapsed%60}s)")
-                break
-            elif status == 'Failed':
-                print(f" {endpoint_name} deployment failed!")
-                print(f"Failure reason: {response.get('FailureReason', 'Unknown')}")
-                break
-                
-        except Exception as e:
-            print(f"Error checking {endpoint_name}: {e}")
-            
-        time.sleep(30)  # Check every 30 seconds
-
-def load_custom_spec():
-    """Load the custom spec file from src/sagemaker directory."""
-    spec_path = os.path.join(os.path.dirname(__file__), 'specfileex')
-    with open(spec_path, 'r') as f:
-        return json.load(f)
-
-
-# Check AWS account 
-account_id, region = check_aws_account()
-
-custom_spec = load_custom_spec()
-mock_specs = JumpStartModelSpecs(custom_spec)
-
-with patch('sagemaker.jumpstart.cache.JumpStartModelsCache.get_specs') as mock_get_specs, \
-     patch('sagemaker.jumpstart.utils.validate_model_id_and_get_type') as mock_validate_model:
-    
-    mock_get_specs.return_value = mock_specs
-    mock_validate_model.return_value = JumpStartModelType.OPEN_WEIGHTS
-    
-    model_id = "meta-textgeneration-llama-2-7b-f"
-    model_version = "4.19.0"
-    accept_eula = False
-    
-    # Create unique endpoint names with timestamp
-    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
-    neuron_endpoint_name = f"llama-neuron-{timestamp}"
-    gpu_endpoint_name = f"llama-gpu-{timestamp}"
-    
-    print(f" Neuron endpoint: {neuron_endpoint_name}")
-    print(f" GPU endpoint: {gpu_endpoint_name}")
-    print()
-
-    
-    model_neuron = JumpStartModel(
-        model_id=model_id, 
-        model_version=model_version,
-        instance_type="ml.inf2.24xlarge",
-        env={"HUGGING_FACE_HUB_TOKEN": "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"}
-    )
-
-    # Modify to use alpha us-west-2 bucket
-    original_neuron_uri = model_neuron.model_data['S3DataSource']['S3Uri']
-    # Replace with alpha us-west-2 bucket (handle both east-1 and west-2 original buckets)
-    alpha_neuron_uri = original_neuron_uri.replace('jumpstart-private-cache-prod-us-east-1', 'jumpstart-private-cache-alpha-us-west-2')
-    alpha_neuron_uri = alpha_neuron_uri.replace('jumpstart-private-cache-prod-us-west-2', 'jumpstart-private-cache-alpha-us-west-2')
-    # Also handle regular cache buckets (without "private")
-    alpha_neuron_uri = alpha_neuron_uri.replace('jumpstart-cache-prod-us-east-1', 'jumpstart-cache-alpha-us-west-2')
-    alpha_neuron_uri = alpha_neuron_uri.replace('jumpstart-cache-prod-us-west-2', 'jumpstart-cache-alpha-us-west-2')
-    model_neuron.model_data['S3DataSource']['S3Uri'] = alpha_neuron_uri
-    print(f"Original neuron URI: {original_neuron_uri}")
-    print(f"Alpha neuron URI: {alpha_neuron_uri}")
-    print(model_neuron.model_data)
-    neuron_location = model_neuron.model_data['S3DataSource']['S3Uri']
-    print(f"Neuron location: {neuron_location}")
-
-    print("Deploying neuron model...")
-    neuron_predictor = model_neuron.deploy(
-        initial_instance_count=1,
-        instance_type="ml.inf2.24xlarge",
-        endpoint_name=neuron_endpoint_name,
-        accept_eula=True,
-        wait=False 
-    )
-    
-    # Monitor neuron deployment
-    monitor_endpoint(neuron_endpoint_name, 'us-west-2')
-
-
-
-    model_gpu = JumpStartModel(
-        model_id=model_id, 
-        model_version=model_version, 
-        instance_type="ml.g5.12xlarge",
-        env={"HUGGING_FACE_HUB_TOKEN": "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"}
-    )
-
-    # Modify to use alpha us-west-2 bucket  
-    original_gpu_uri = model_gpu.model_data['S3DataSource']['S3Uri']
-    # Replace with alpha us-west-2 bucket (handle both east-1 and west-2 original buckets)
-    alpha_gpu_uri = original_gpu_uri.replace('jumpstart-private-cache-prod-us-east-1', 'jumpstart-private-cache-alpha-us-west-2')
-    alpha_gpu_uri = alpha_gpu_uri.replace('jumpstart-private-cache-prod-us-west-2', 'jumpstart-private-cache-alpha-us-west-2')
-    # Also handle regular cache buckets (without "private")
-    alpha_gpu_uri = alpha_gpu_uri.replace('jumpstart-cache-prod-us-east-1', 'jumpstart-cache-alpha-us-west-2')
-    alpha_gpu_uri = alpha_gpu_uri.replace('jumpstart-cache-prod-us-west-2', 'jumpstart-cache-alpha-us-west-2')
-    model_gpu.model_data['S3DataSource']['S3Uri'] = alpha_gpu_uri
-    print(f"Original GPU URI: {original_gpu_uri}")
-    print(f"Alpha GPU URI: {alpha_gpu_uri}")
-    print(model_gpu.model_data)
-    gpu_location = model_gpu.model_data['S3DataSource']['S3Uri']
-    print(f"GPU location: {gpu_location}")
-
-    print("Deploying GPU model...")
-    gpu_predictor = model_gpu.deploy(
-        initial_instance_count=1,
-        instance_type="ml.g5.12xlarge",
-        endpoint_name=gpu_endpoint_name,
-        accept_eula=True,
-        wait=False  
-    )
-    
-    # Monitor GPU deployment  
-    monitor_endpoint(gpu_endpoint_name, 'us-west-2')
-
-    test_payload = {
-        "inputs": "The meaning of life is",
-        "parameters": {
-            "max_new_tokens": 50,
-            "temperature": 0.7
-        }
-    }
-
-    print("Testing neuron endpoint...")
-    neuron_response = neuron_predictor.predict(test_payload)
-    print(f"Neuron response: {neuron_response}")
-
-    print("Testing GPU endpoint...")
-    gpu_response = gpu_predictor.predict(test_payload)
-    print(f"GPU response: {gpu_response}")
-
-
-    #print("Cleaning up endpoints...")
-    #neuron_predictor.delete_endpoint()
-    #gpu_predictor.delete_endpoint()