ROCm · Rohan138 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025 · Apr 1, 2025
@@ -29,119 +29,119 @@
     "args": ""
   },
   {
-    "name": "pyt_vllm_llama-3.1-8b",
+    "name": "pyt_vllm_llama-2-7b",
     "url": "",
+    "data": "meta-llama/Llama-2-7b-chat-hf",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.1-8B-Instruct.csv",
+    "multiple_results": "perf_Llama-2-7b-chat-hf.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.1-8B-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-2-7b-chat-hf --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-3.1-70b",
+    "name": "pyt_vllm_llama-2-70b",
     "url": "",
+    "data": "meta-llama/Llama-2-70b-chat-hf",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.1-70B-Instruct.csv",
+    "multiple_results": "perf_Llama-2-70b-chat-hf.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.1-70B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-2-70b-chat-hf --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-3.1-405b",
+    "name": "pyt_vllm_llama-3.1-8b",
     "url": "",
+    "data": "meta-llama/Llama-3.1-8B-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.1-405B-Instruct.csv",
+    "multiple_results": "perf_Llama-3.1-8B-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.1-405B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.1-8B-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-3.2-11b-vision-instruct",
+    "name": "pyt_vllm_llama-3.1-70b",
     "url": "",
+    "data": "meta-llama/Llama-3.1-70B-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.2-11B-Vision-Instruct.csv",
+    "multiple_results": "perf_Llama-3.1-70B-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.2-11B-Vision-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.1-70B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-2-7b",
+    "name": "pyt_vllm_llama-3.1-405b",
     "url": "",
+    "data": "meta-llama/Llama-3.1-405B-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-2-7b-chat-hf.csv",
+    "multiple_results": "perf_Llama-3.1-405B-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-2-7b-chat-hf --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.1-405B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-2-70b",
+    "name": "pyt_vllm_llama-3.2-11b-vision-instruct",
     "url": "",
+    "data": "meta-llama/Llama-3.2-11B-Vision-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-2-70b-chat-hf.csv",
+    "multiple_results": "perf_Llama-3.2-11B-Vision-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-2-70b-chat-hf --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.2-11B-Vision-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
   },
   {
     "name": "pyt_vllm_mixtral-8x7b",
     "url": "",
+    "data": "mistralai/Mixtral-8x7B-Instruct-v0.1",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -157,9 +157,9 @@
   {
     "name": "pyt_vllm_mixtral-8x22b",
     "url": "",
+    "data": "mistralai/Mixtral-8x22B-Instruct-v0.1",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -175,9 +175,9 @@
   {
     "name": "pyt_vllm_mistral-7b",
     "url": "",
+    "data": "mistralai/Mistral-7B-Instruct-v0.1",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -195,7 +195,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -213,7 +212,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -231,7 +229,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -249,7 +246,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -267,7 +263,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -285,7 +280,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -301,9 +295,9 @@
   {
     "name": "pyt_vllm_llama-3.1-8b_fp8",
     "url": "",
+    "data": "amd/Llama-3.1-8B-Instruct-FP8-KV",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -319,9 +313,9 @@
   {
     "name": "pyt_vllm_llama-3.1-70b_fp8",
     "url": "",
+    "data": "amd/Llama-3.1-70B-Instruct-FP8-KV",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -337,9 +331,9 @@
   {
     "name": "pyt_vllm_llama-3.1-405b_fp8",
     "url": "",
+    "data": "amd/Llama-3.1-405B-Instruct-FP8-KV",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -357,7 +351,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -375,7 +368,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -393,7 +385,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -411,7 +402,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -429,7 +419,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -447,7 +436,6 @@
     "url": "",
     "dockerfile": "docker/pytorch_train",
     "scripts": "scripts/pytorch_train/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -464,7 +452,6 @@
     "url": "",
     "dockerfile": "docker/pytorch_train",
     "scripts": "scripts/pytorch_train/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -481,7 +468,6 @@
     "url": "",
     "dockerfile": "docker/pytorch_train",
     "scripts": "scripts/pytorch_train/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",

@@ -50,6 +50,12 @@ model_org_name=(${model//// })
 model_name=${model_org_name[1]}
 tp=$numgpu
 
+# Use local data if present
+if [ -n "$MAD_DATAHOME" ]; then
+    echo "Using data from $MAD_DATAHOME"
+    model=$MAD_DATAHOME
+fi
+
 # perf configuration
 export VLLM_USE_TRITON_FLASH_ATTN=0
 export NCCL_MIN_NCHANNELS=112