diff --git a/CMakeLists.txt b/CMakeLists.txt
index ec7bd6c51453..87b1d43748ee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,7 @@ tvm_option(USE_OPENCL "Build with OpenCL" OFF)
 tvm_option(USE_OPENCL_ENABLE_HOST_PTR "Enable OpenCL memory object access to host" OFF)
 tvm_option(USE_OPENCL_GTEST "Path to OpenCL specific gtest version for runtime cpp tests." /path/to/opencl/gtest)
 tvm_option(USE_VULKAN "Build with Vulkan" OFF)
+tvm_option(USE_VULKAN_GTEST "Path to Vulkan specific gtest version for runtime cpp tests." /path/to/vulkan/gtest)
 
 
 # Whether to use spirv-tools.and SPIRV-Headers from Khronos github or gitlab.
@@ -454,6 +455,9 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CUDA_STANDARD_REQUIRED ON)
 set(CMAKE_CUDA_STANDARD 17)
 
+#include centralized gtest setup
+include(cmake/modules/GTestConfig.cmake)
+
 # Module rules
 include(cmake/modules/CUDA.cmake)
 include(cmake/modules/Hexagon.cmake) # This must come before logging.cmake
diff --git a/cmake/modules/GTestConfig.cmake b/cmake/modules/GTestConfig.cmake
new file mode 100644
index 000000000000..beb0aa12a9b2
--- /dev/null
+++ b/cmake/modules/GTestConfig.cmake
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(Build_GTests OFF)
+if(NOT TARGET gtest)
+  unset(runtime_gtests)
+  if(DEFINED USE_OPENCL_GTEST AND EXISTS ${USE_OPENCL_GTEST})
+    set(runtime_gtests ${USE_OPENCL_GTEST})
+  elseif(DEFINED USE_VULKAN_GTEST AND EXISTS ${USE_VULKAN_GTEST})
+    set(runtime_gtests ${USE_VULKAN_GTEST})
+  elseif(ANDROID_ABI AND DEFINED ENV{ANDROID_NDK_HOME})
+    set(GOOGLETEST_ROOT $ENV{ANDROID_NDK_HOME}/sources/third_party/googletest)
+    add_library(gtest_main STATIC
+      ${GOOGLETEST_ROOT}/src/gtest_main.cc
+      ${GOOGLETEST_ROOT}/src/gtest-all.cc)
+    target_include_directories(gtest_main PRIVATE ${GOOGLETEST_ROOT})
+    target_include_directories(gtest_main PUBLIC ${GOOGLETEST_ROOT}/include)
+    set(Build_GTests ON)
+    message(STATUS "Using gtest from Android NDK")
+    return()
+  else()
+    message(STATUS "No valid GTest path found, skipping GTest configuration")
+    return()
+  endif()
+
+  # Configure if runtime_gtests is valid
+  if(runtime_gtests AND EXISTS ${runtime_gtests})
+    include(FetchContent)
+    FetchContent_Declare(googletest SOURCE_DIR "${runtime_gtests}")
+    set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+    FetchContent_MakeAvailable(googletest)
+    install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
+    set(Build_GTests ON)
+  else()
+    set(Build_GTests OFF)
+    return()
+  endif()
+endif()
diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake
index f286d9f7d9fa..0a83a59fbf05 100644
--- a/cmake/modules/LibInfo.cmake
+++ b/cmake/modules/LibInfo.cmake
@@ -120,6 +120,7 @@ function(add_lib_info src_file)
     TVM_INFO_USE_THRUST="${USE_THRUST}"
     TVM_INFO_USE_CURAND="${USE_CURAND}"
     TVM_INFO_USE_VULKAN="${USE_VULKAN}"
+    TVM_INFO_USE_VULKAN_GTEST="${USE_VULKAN_GTEST}"
     TVM_INFO_USE_CLML="${USE_CLML}"
     TVM_INFO_USE_CLML_GRAPH_EXECUTOR="${USE_CLML_GRAPH_EXECUTOR}"
     TVM_INFO_USE_TVM_CLML_VERSION="${CLML_VERSION_MAJOR}"
diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index c5c8eae721fa..32520f044e89 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -14,7 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
 if(USE_OPENCL)
   tvm_file_glob(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
   list(APPEND COMPILER_SRCS src/target/spirv/spirv_utils.cc)
@@ -35,36 +34,15 @@ if(USE_OPENCL)
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenCL_LIBRARIES})
   endif()
 
-  if(DEFINED USE_OPENCL_GTEST)
-    if(EXISTS ${USE_OPENCL_GTEST})
-        include(FetchContent)
-        FetchContent_Declare(googletest SOURCE_DIR "${USE_OPENCL_GTEST}")
-        set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-        FetchContent_MakeAvailable(googletest)
-        install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
-
-        message(STATUS "Found OpenCL gtest at ${USE_OPENCL_GTEST}")
-        set(Build_OpenCL_GTests ON)
-    elseif (ANDROID_ABI AND DEFINED ENV{ANDROID_NDK_HOME})
-        set(GOOGLETEST_ROOT $ENV{ANDROID_NDK_HOME}/sources/third_party/googletest)
-        add_library(gtest_main STATIC ${GOOGLETEST_ROOT}/src/gtest_main.cc ${GOOGLETEST_ROOT}/src/gtest-all.cc)
-        target_include_directories(gtest_main PRIVATE ${GOOGLETEST_ROOT})
-        target_include_directories(gtest_main PUBLIC ${GOOGLETEST_ROOT}/include)
-        message(STATUS "Using gtest from Android NDK")
-        set(Build_OpenCL_GTests ON)
-    endif()
-
-    if(Build_OpenCL_GTests)
-        message(STATUS "Building OpenCL-Gtests")
-        tvm_file_glob(GLOB_RECURSE OPENCL_TEST_SRCS
-          "tests/cpp-runtime/opencl/*.cc"
-        )
-        add_executable(opencl-cpptest ${OPENCL_TEST_SRCS})
-        target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime ${OpenCL_LIBRARIES})
-    else()
-        message(STATUS "Couldn't build OpenCL-Gtests")
-    endif()
+  if(Build_GTests)
+    message(STATUS "Building OpenCL GTests")
+    tvm_file_glob(GLOB_RECURSE OPENCL_TEST_SRCS "tests/cpp-runtime/opencl/*.cc")
+    add_executable(opencl-cpptest ${OPENCL_TEST_SRCS})
+    target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime ${OpenCL_LIBRARIES})
+  else()
+    message(STATUS "Couldn't build OpenCL-Gtests")
   endif()
+
   list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
   if(USE_OPENCL_ENABLE_HOST_PTR)
     add_definitions(-DOPENCL_ENABLE_HOST_PTR)
diff --git a/cmake/modules/Vulkan.cmake b/cmake/modules/Vulkan.cmake
index 1f303f3a032b..35994c6dc92b 100644
--- a/cmake/modules/Vulkan.cmake
+++ b/cmake/modules/Vulkan.cmake
@@ -30,6 +30,16 @@ if(USE_VULKAN)
   message(STATUS "Build with Vulkan support")
   tvm_file_glob(GLOB RUNTIME_VULKAN_SRCS src/runtime/vulkan/*.cc)
   tvm_file_glob(GLOB COMPILER_VULKAN_SRCS src/target/spirv/*.cc)
+
+  if(Build_GTests)
+    message(STATUS "Building Vulkan GTests")
+    tvm_file_glob(GLOB_RECURSE VULKAN_TEST_SRCS "tests/cpp-runtime/vulkan/*.cc")
+    add_executable(vulkan-cpptest ${VULKAN_TEST_SRCS})
+    target_link_libraries(vulkan-cpptest PRIVATE gtest_main tvm_runtime)
+  else()
+    message(STATUS "Couldn't build Vulkan-Gtests")
+  endif()
+
   list(APPEND RUNTIME_SRCS ${RUNTIME_VULKAN_SRCS})
   list(APPEND COMPILER_SRCS ${COMPILER_VULKAN_SRCS})
   list(APPEND TVM_LINKER_LIBS ${Vulkan_SPIRV_TOOLS_LIBRARY})
diff --git a/cmake/utils/FindVulkan.cmake b/cmake/utils/FindVulkan.cmake
index 032ff1dffa21..b1b9693a2f6c 100644
--- a/cmake/utils/FindVulkan.cmake
+++ b/cmake/utils/FindVulkan.cmake
@@ -36,7 +36,7 @@ macro(find_vulkan use_vulkan use_khronos_spirv)
   set(__use_vulkan ${use_vulkan})
   if(IS_DIRECTORY ${__use_vulkan})
     set(__vulkan_sdk ${__use_vulkan})
-    message(STATUS "Custom Vulkan SDK PATH=" ${__use_vulkan})
+    message(STATUS "Using custom Vulkan SDK: ${__vulkan_sdk}")
   elseif(IS_DIRECTORY $ENV{VULKAN_SDK})
     set(__vulkan_sdk $ENV{VULKAN_SDK})
   else()
@@ -46,19 +46,41 @@ macro(find_vulkan use_vulkan use_khronos_spirv)
 
   if(IS_DIRECTORY ${use_khronos_spirv})
     set(__use_khronos_spirv ${use_khronos_spirv})
-    message(STATUS "Custom khronos spirv PATH=" ${__use_khronos_spirv})
+    message(STATUS "Using custom Khronos SPIRV path: ${__use_khronos_spirv}")
   else()
     set(__use_khronos_spirv "")
   endif()
 
   if(CMAKE_SYSTEM_NAME STREQUAL "Android")
-    set(VULKAN_NDK_SRC ${CMAKE_ANDROID_NDK}/sources/third_party/vulkan/src)
-    set(Vulkan_INCLUDE_DIRS ${VULKAN_NDK_SRC}/include)
-    set(Vulkan_FOUND TRUE)
-    message(STATUS "Android Vulkan_INCLUDE_DIRS=" ${Vulkan_INCLUDE_DIRS})
-    message(STATUS "Skip finding SPIRV in Android, make sure you only build tvm runtime.")
-    return()
-  endif()
+    message(STATUS "Detected Android build")
+
+    set(Vulkan_INCLUDE_DIRS "${CMAKE_SYSROOT}/usr/include/vulkan")
+
+    # Map Android ABI to architecture
+    set(ANDROID_LIB_ARCH "")
+    if(CMAKE_ANDROID_ARCH_ABI STREQUAL "arm64-v8a")
+      set(ANDROID_LIB_ARCH "aarch64-linux-android")
+    elseif(CMAKE_ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
+      set(ANDROID_LIB_ARCH "arm-linux-androideabi")
+    elseif(CMAKE_ANDROID_ARCH_ABI STREQUAL "x86")
+      set(ANDROID_LIB_ARCH "i686-linux-android")
+    elseif(CMAKE_ANDROID_ARCH_ABI STREQUAL "x86_64")
+      set(ANDROID_LIB_ARCH "x86_64-linux-android")
+    else()
+      message(FATAL_ERROR "Unsupported Android ABI: ${CMAKE_ANDROID_ARCH_ABI}")
+    endif()
+
+    # Find Vulkan library for Android
+    set(Vulkan_LIB_PATH "${CMAKE_SYSROOT}/usr/lib/${ANDROID_LIB_ARCH}/27")
+    find_library(Vulkan_LIBRARY NAMES vulkan libvulkan.so PATHS ${Vulkan_LIB_PATH} NO_DEFAULT_PATH)
+
+    if(Vulkan_LIBRARY)
+      set(Vulkan_FOUND TRUE)
+    else()
+      message(FATAL_ERROR "Could not find Vulkan lib in ${Vulkan_LIB_PATH}")
+    endif()
+
+  else()
 
   if(__vulkan_sdk)
     set(Vulkan_INCLUDE_DIRS ${__vulkan_sdk}/include)
@@ -66,25 +88,22 @@ macro(find_vulkan use_vulkan use_khronos_spirv)
     if(Vulkan_LIBRARY)
       set(Vulkan_FOUND TRUE)
     endif()
-  endif(__vulkan_sdk)
+  endif()
 
-  # resort to find vulkan of option is on
-  if(NOT Vulkan_FOUND)
-    if(${__use_vulkan} MATCHES ${IS_TRUE_PATTERN})
-      find_package(Vulkan QUIET)
-    endif()
+  if(NOT Vulkan_FOUND AND ${use_vulkan} MATCHES ${IS_TRUE_PATTERN})
+    find_package(Vulkan QUIET)
   endif()
 
   if(Vulkan_FOUND)
     get_filename_component(VULKAN_LIBRARY_PATH ${Vulkan_LIBRARY} DIRECTORY)
     if (WIN32)
       find_library(Vulkan_SPIRV_TOOLS_LIBRARY SPIRV-Tools
-        HINTS ${__use_khronos_spirv}/spirv-tools/lib ${VULKAN_LIBRARY_PATH} ${VULKAN_LIBRARY_PATH}/spirv-tools ${VULKAN_SDK}/lib)
+          HINTS ${__use_khronos_spirv}/spirv-tools/lib ${VULKAN_LIBRARY_PATH} ${VULKAN_LIBRARY_PATH}/spirv-tools ${__vulkan_sdk}/lib)
       find_path(_libspirv libspirv.h HINTS ${__use_khronos_spirv}/spirv-tools/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan spirv-tools)
       find_path(_spirv spirv.hpp HINTS ${__use_khronos_spirv}/SPIRV-Headers/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan SPIRV spirv/unified1 spirv-headers)
     else()
       find_library(Vulkan_SPIRV_TOOLS_LIBRARY SPIRV-Tools
-          HINTS ${__use_khronos_spirv}/lib ${VULKAN_LIBRARY_PATH} ${VULKAN_LIBRARY_PATH}/spirv-tools ${VULKAN_SDK}/lib)
+          HINTS ${__use_khronos_spirv}/lib ${VULKAN_LIBRARY_PATH} ${VULKAN_LIBRARY_PATH}/spirv-tools ${__vulkan_sdk}/lib)
       find_path(_libspirv libspirv.h HINTS ${__use_khronos_spirv}/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan spirv-tools)
       find_path(_spirv spirv.hpp HINTS ${__use_khronos_spirv}/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan SPIRV spirv/unified1 spirv-headers)
     endif()
@@ -95,4 +114,5 @@ macro(find_vulkan use_vulkan use_khronos_spirv)
     message(STATUS "Vulkan_LIBRARY=" ${Vulkan_LIBRARY})
     message(STATUS "Vulkan_SPIRV_TOOLS_LIBRARY=" ${Vulkan_SPIRV_TOOLS_LIBRARY})
   endif(Vulkan_FOUND)
+  endif()
 endmacro(find_vulkan)
diff --git a/python/tvm/relax/pipeline.py b/python/tvm/relax/pipeline.py
index 1c25b2053bc2..79fde6c362f4 100644
--- a/python/tvm/relax/pipeline.py
+++ b/python/tvm/relax/pipeline.py
@@ -337,7 +337,7 @@ def get_default_pipeline(target: tvm.target.Target):
         return backend.gpu_generic.get_default_pipeline(target)
     if target.kind.name == "llvm":
         return backend.cpu_generic.get_default_pipeline(target)
-    if target.kind.name == "opencl" and "adreno" in target.keys:
+    if target.kind.name in ["opencl", "vulkan"] and "adreno" in target.keys:
         return backend.adreno.get_default_pipeline(target)
     if BackendDispatcher.is_gpu_target(target):
         return backend.gpu_generic.get_default_pipeline(target)
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 51fad1803ad9..4167759a8caf 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -838,6 +838,14 @@ def _multi_gpu_exists():
     )
 
 
+def _check_opencl_vulkan():
+    return (
+        (_cmake_flag_enabled("USE_OPENCL") and tvm.opencl(0).exist)
+        or (_cmake_flag_enabled("USE_VULKAN") and tvm.vulkan(0).exist)
+        or "RPC_TARGET" in os.environ
+    )
+
+
 # Mark a test as requiring llvm to run
 requires_llvm = Feature(
     "llvm", "LLVM", cmake_flag="USE_LLVM", target_kind_enabled="llvm", target_kind_hardware="llvm"
@@ -976,8 +984,8 @@ def _multi_gpu_exists():
     "Vulkan",
     cmake_flag="USE_VULKAN",
     target_kind_enabled="vulkan",
-    target_kind_hardware="vulkan",
-    parent_features="gpu",
+    target_kind_hardware="vulkan" if "RPC_TARGET" not in os.environ else None,
+    parent_features="gpu" if "RPC_TARGET" not in os.environ else None,
 )
 
 # Mark a test as requiring OpenCLML support in build.
@@ -988,6 +996,13 @@ def _multi_gpu_exists():
     target_kind_enabled="opencl",
 )
 
+requires_opencl_vulkan = Feature(
+    "opencl_vulkan",
+    "OpenCL or Vulkan",
+    run_time_check=_check_opencl_vulkan,
+    parent_features=["opencl", "gpu"],
+)
+
 # Mark a test as requiring NNAPI support in build.
 requires_nnapi = Feature(
     "NNAPI",
diff --git a/python/tvm/tir/pipeline.py b/python/tvm/tir/pipeline.py
index 1ee4a5b1d315..8dd563752a95 100644
--- a/python/tvm/tir/pipeline.py
+++ b/python/tvm/tir/pipeline.py
@@ -178,7 +178,7 @@ def get_default_tir_pipeline(
     target: tvm.target.Target,  # pylint: disable=unused-argument
 ) -> tvm.transform.Pass:
     """Get the default TIR pipeline for the given target."""
-    if target.kind.name == "opencl" and "adreno" in target.keys:
+    if target.kind.name in ["opencl", "vulkan"] and "adreno" in target.keys:
         return backend.adreno.get_tir_pipeline(target)
     else:
         return default_tir_pipeline()
diff --git a/src/runtime/file_utils.cc b/src/runtime/file_utils.cc
index b3733ee6fdff..d64c95df83be 100644
--- a/src/runtime/file_utils.cc
+++ b/src/runtime/file_utils.cc
@@ -45,6 +45,7 @@ void FunctionInfo::Save(dmlc::JSONWriter* writer) const {
   writer->BeginObject();
   writer->WriteObjectKeyValue("name", name);
   writer->WriteObjectKeyValue("arg_types", sarg_types);
+  writer->WriteObjectKeyValue("storage_scopes", storage_scopes);
   writer->WriteObjectKeyValue("launch_param_tags", launch_param_tags);
   std::vector<int> iarg_extra_tags(arg_extra_tags.size());
   for (size_t i = 0; i < arg_extra_tags.size(); ++i) {
@@ -59,6 +60,7 @@ void FunctionInfo::Load(dmlc::JSONReader* reader) {
   std::vector<std::string> sarg_types;
   helper.DeclareField("name", &name);
   helper.DeclareField("arg_types", &sarg_types);
+  helper.DeclareOptionalField("storage_scopes", &storage_scopes);
   helper.DeclareOptionalField("launch_param_tags", &launch_param_tags);
   helper.DeclareOptionalField("thread_axis_tags",
                               &launch_param_tags);  // for backward compatibility
@@ -78,6 +80,7 @@ void FunctionInfo::Load(dmlc::JSONReader* reader) {
 void FunctionInfo::Save(dmlc::Stream* writer) const {
   writer->Write(name);
   writer->Write(arg_types);
+  writer->Write(storage_scopes);
   writer->Write(launch_param_tags);
   writer->Write(arg_extra_tags);
 }
@@ -85,6 +88,7 @@ void FunctionInfo::Save(dmlc::Stream* writer) const {
 bool FunctionInfo::Load(dmlc::Stream* reader) {
   if (!reader->Read(&name)) return false;
   if (!reader->Read(&arg_types)) return false;
+  if (!reader->Read(&storage_scopes)) return false;
   if (!reader->Read(&launch_param_tags)) return false;
   if (!reader->Read(&arg_extra_tags)) return false;
   return true;
diff --git a/src/runtime/meta_data.h b/src/runtime/meta_data.h
index aceb97b58374..61e4fde31a6a 100644
--- a/src/runtime/meta_data.h
+++ b/src/runtime/meta_data.h
@@ -59,6 +59,7 @@ constexpr const char* kUseCooperativeLaunch = "tir.use_cooperative_launch";
 struct FunctionInfo {
   std::string name;
   std::vector<DLDataType> arg_types;
+  std::vector<std::string> storage_scopes;
   std::vector<std::string> launch_param_tags;
 
   enum class ArgExtraTags : int { kNone = 0, kTensorMap = 1 };
diff --git a/src/runtime/vulkan/vulkan_buffer.cc b/src/runtime/vulkan/vulkan_buffer.cc
index f8d40b030919..646e03e441ad 100644
--- a/src/runtime/vulkan/vulkan_buffer.cc
+++ b/src/runtime/vulkan/vulkan_buffer.cc
@@ -22,6 +22,7 @@
 #include <utility>
 
 #include "vulkan_device_api.h"
+#include "vulkan_resource.h"
 
 namespace tvm {
 namespace runtime {
@@ -29,6 +30,7 @@ namespace vulkan {
 
 VkBufferCreateInfo MakeBufferCreateInfo(size_t nbytes, VkBufferUsageFlags usage) {
   VkBufferCreateInfo info = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+
   info.size = nbytes;
   // Since sharingMode is not VK_SHARING_MODE_CONCURRENT, no need to
   // specify the queue families.
@@ -38,46 +40,48 @@ VkBufferCreateInfo MakeBufferCreateInfo(size_t nbytes, VkBufferUsageFlags usage)
 }
 
 VulkanBuffer::VulkanBuffer(const VulkanDevice& device, size_t nbytes, VkBufferUsageFlags usage,
-                           uint32_t mem_type_index)
-    : device_(device) {
-  // Create a buffer
+                           uint32_t mem_type_index, std::optional<std::string> mem_scope,
+                           std::shared_ptr<VulkanMemory> back_memory)
+    : VulkanResource(device, mem_scope, back_memory), size(nbytes) {
   VkBufferCreateInfo buffer_info = MakeBufferCreateInfo(nbytes, usage);
   VULKAN_CALL(vkCreateBuffer(device, &buffer_info, nullptr, &buffer));
 
-  // Allocate memory
-  VkMemoryAllocateInfo mem_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
-  mem_info.allocationSize = buffer_info.size;
-  mem_info.memoryTypeIndex = mem_type_index;
-
-  VkMemoryDedicatedAllocateInfoKHR dedicated_info = {
-      VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR};
+  VkMemoryRequirements mem_reqs;
+  vkGetBufferMemoryRequirements(device, buffer, &mem_reqs);
 
-  bool use_dedicated_allocation = UseDedicatedAllocation(device, buffer, &mem_info.allocationSize);
-  if (use_dedicated_allocation) {
-    dedicated_info.buffer = buffer;
-    mem_info.pNext = &dedicated_info;
+  // Allocate new memory if no memory is passed in or if the existing memory is not compatible
+  if (!memory) {
+    AllocateMemory(mem_reqs, mem_type_index);
   }
 
-  VULKAN_CALL(vkAllocateMemory(device, &mem_info, nullptr, &memory));
-
   // Bind the buffer to the allocated memory
-  VULKAN_CALL(vkBindBufferMemory(device, buffer, memory, 0));
+  VULKAN_CALL(vkBindBufferMemory(device, buffer, memory->memory_, 0));
+}
+
+void VulkanBuffer::AllocateMemory(const VkMemoryRequirements& mem_reqs, uint32_t mem_type_index) {
+  VkMemoryAllocateInfo mem_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
+  mem_info.allocationSize = mem_reqs.size;
+  mem_info.memoryTypeIndex = mem_type_index;
+
+  // Allocate memory
+  VkDeviceMemory raw_memory;
+  VULKAN_CALL(vkAllocateMemory(device_, &mem_info, nullptr, &raw_memory));
+
+  // Store the allocated memory along with its requirements
+  memory = std::make_shared<VulkanMemory>(raw_memory, mem_reqs);
 }
 
 VulkanBuffer::~VulkanBuffer() {
   if (buffer) {
     vkDestroyBuffer(device_, buffer, nullptr);
-  }
-  if (memory) {
-    vkFreeMemory(device_, memory, nullptr);
+    buffer = VK_NULL_HANDLE;
   }
 }
 
 VulkanBuffer::VulkanBuffer(VulkanBuffer&& other)
-    : device_(other.device_), buffer(other.buffer), memory(other.memory) {
-  other.device_ = VK_NULL_HANDLE;
+    : VulkanResource(std::move(other)), buffer(other.buffer) {
   other.buffer = VK_NULL_HANDLE;
-  other.memory = VK_NULL_HANDLE;
+  other.size = 0;
 }
 
 VulkanBuffer& VulkanBuffer::operator=(VulkanBuffer&& other) {
@@ -115,14 +119,15 @@ bool VulkanBuffer::UseDedicatedAllocation(const VulkanDevice& device, VkBuffer b
 }
 
 VulkanHostVisibleBuffer::VulkanHostVisibleBuffer(const VulkanDevice& device, size_t nbytes,
-                                                 VkBufferUsageFlags usage, uint32_t mem_type_index)
-    : vk_buf(device, nbytes, usage, mem_type_index), size(nbytes) {
-  VULKAN_CALL(vkMapMemory(device, vk_buf.memory, 0, size, 0, &host_addr));
+                                                 VkBufferUsageFlags usage, uint32_t mem_type_index,
+                                                 std::optional<std::string> mem_scope)
+    : vk_buf(device, nbytes, usage, mem_type_index, mem_scope), size(nbytes) {
+  VULKAN_CALL(vkMapMemory(device, vk_buf.memory->memory_, 0, size, 0, &host_addr));
 }
 
 VulkanHostVisibleBuffer::~VulkanHostVisibleBuffer() {
   if (host_addr) {
-    vkUnmapMemory(vk_buf.device_, vk_buf.memory);
+    vkUnmapMemory(vk_buf.device_, vk_buf.memory->memory_);
   }
 }
 
diff --git a/src/runtime/vulkan/vulkan_buffer.h b/src/runtime/vulkan/vulkan_buffer.h
index a3e37431e434..464829489f5c 100644
--- a/src/runtime/vulkan/vulkan_buffer.h
+++ b/src/runtime/vulkan/vulkan_buffer.h
@@ -23,15 +23,17 @@
 #include <vulkan/vulkan_core.h>
 
 #include <memory>
+#include <optional>
+#include <string>
 #include <unordered_map>
 
+#include "vulkan_resource.h"
+
 namespace tvm {
 namespace runtime {
 namespace vulkan {
 
-class VulkanDevice;
-
-class VulkanBuffer {
+class VulkanBuffer : public VulkanResource {
  public:
   /* \brief Allocate memory on the device
    *
@@ -47,10 +49,11 @@ class VulkanBuffer {
    * an index to a compatible memory located in
    * VkPhysicalDeviceMemoryProperties.
    */
+
   VulkanBuffer(const VulkanDevice& device, size_t nbytes, VkBufferUsageFlags usage,
-               uint32_t mem_type_index);
+               uint32_t mem_type_index, std::optional<std::string> mem_scope = std::nullopt,
+               std::shared_ptr<VulkanMemory> back_memory = nullptr);
 
-  //! \brief Destructor, deallocates the memory and buffer.
   ~VulkanBuffer();
 
   // Forbid copy assignment/constructor
@@ -61,6 +64,8 @@ class VulkanBuffer {
   VulkanBuffer(VulkanBuffer&&);
   VulkanBuffer& operator=(VulkanBuffer&&);
 
+  void AllocateMemory(const VkMemoryRequirements& mem_reqs, uint32_t mem_type_index);
+
  private:
   /*! \brief Whether this buffer should be allocated using dedicated
    * allocation
@@ -95,15 +100,11 @@ class VulkanBuffer {
    * VulkanDevice may be moved to a different location while the
    * VulkanBuffer is alive.
    */
-  VkDevice device_{VK_NULL_HANDLE};
 
   //! \brief Handle to the logical buffer on the device
   VkBuffer buffer{VK_NULL_HANDLE};
 
-  //! \brief Handle to the physical device memory
-  VkDeviceMemory memory{VK_NULL_HANDLE};
-
-  friend class VulkanHostVisibleBuffer;
+  size_t size{0};  // buffer size
 };
 
 /*! \brief A struct to represent Vulkan buffers backed by host visible memory */
@@ -124,7 +125,8 @@ class VulkanHostVisibleBuffer {
    * VkPhysicalDeviceMemoryProperties.
    */
   VulkanHostVisibleBuffer(const VulkanDevice& device, size_t nbytes, VkBufferUsageFlags usage,
-                          uint32_t mem_type_index);
+                          uint32_t mem_type_index,
+                          std::optional<std::string> mem_scope = std::nullopt);
 
   //! \brief Unmap memory and deallocate.
   ~VulkanHostVisibleBuffer();
diff --git a/src/runtime/vulkan/vulkan_device.cc b/src/runtime/vulkan/vulkan_device.cc
index cc39972432a3..e809d9951ff5 100644
--- a/src/runtime/vulkan/vulkan_device.cc
+++ b/src/runtime/vulkan/vulkan_device.cc
@@ -143,7 +143,9 @@ VulkanDeviceProperties::VulkanDeviceProperties(const VulkanInstance& instance,
   supported_subgroup_operations =
       (subgroup.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) ? subgroup.supportedOperations : 0;
 
+  timestamp_period = properties.properties.limits.timestampPeriod;
   max_num_threads = properties.properties.limits.maxComputeWorkGroupInvocations;
+  image_row_align = properties.properties.limits.optimalBufferCopyRowPitchAlignment;
 
   // Even if we can't query it, warp size must be at least 1.
   // thread_warp_size = std::max(subgroup.subgroupSize, 1U);
@@ -234,6 +236,12 @@ VulkanGetBufferMemoryRequirements2Functions::VulkanGetBufferMemoryRequirements2F
       vkGetDeviceProcAddr(device, "vkGetBufferMemoryRequirements2KHR"));
 }
 
+VulkanGetImageMemoryRequirements2Functions::VulkanGetImageMemoryRequirements2Functions(
+    VkDevice device) {
+  vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2KHR)ICHECK_NOTNULL(
+      vkGetDeviceProcAddr(device, "vkGetImageMemoryRequirements2KHR"));
+}
+
 VulkanQueueInsertDebugUtilsLabelFunctions::VulkanQueueInsertDebugUtilsLabelFunctions(
     VkInstance instance) {
   vkQueueInsertDebugUtilsLabelEXT = (PFN_vkQueueInsertDebugUtilsLabelEXT)ICHECK_NOTNULL(
diff --git a/src/runtime/vulkan/vulkan_device.h b/src/runtime/vulkan/vulkan_device.h
index 0573a00e5c9e..4497afb018a1 100644
--- a/src/runtime/vulkan/vulkan_device.h
+++ b/src/runtime/vulkan/vulkan_device.h
@@ -33,6 +33,7 @@
 #include "../thread_map.h"
 #include "vulkan/vulkan_core.h"
 #include "vulkan_buffer.h"
+#include "vulkan_image.h"
 #include "vulkan_stream.h"
 
 namespace tvm {
@@ -57,6 +58,12 @@ struct VulkanGetBufferMemoryRequirements2Functions {
   PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR{nullptr};
 };
 
+struct VulkanGetImageMemoryRequirements2Functions {
+  explicit VulkanGetImageMemoryRequirements2Functions(VkDevice device);
+
+  PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR{nullptr};
+};
+
 struct VulkanQueueInsertDebugUtilsLabelFunctions {
   explicit VulkanQueueInsertDebugUtilsLabelFunctions(VkInstance instance);
 
@@ -96,16 +103,18 @@ struct VulkanDeviceProperties {
   uint32_t max_block_size_y{1};
   uint32_t max_block_size_z{1};
   uint32_t max_push_constants_size{128};
-  uint32_t max_uniform_buffer_range{16384};
+  uint32_t max_uniform_buffer_range{65536};
   uint32_t max_storage_buffer_range{1 << 27};
   uint32_t max_per_stage_descriptor_storage_buffer{4};
-  uint32_t max_shared_memory_per_block{16384};
+  uint32_t max_shared_memory_per_block{32768};
   std::string device_type{"unknown_device_type"};
   std::string device_name{"unknown_device_name"};
   std::string driver_name{"unknown_driver_name"};
   uint32_t driver_version{0};
   uint32_t vulkan_api_version{VK_API_VERSION_1_0};
   uint32_t max_spirv_version{0x10000};
+  uint32_t image_row_align{0};
+  float timestamp_period{0};
 };
 
 /*! \brief Handle to the Vulkan API's VkDevice
@@ -219,6 +228,8 @@ class VulkanDevice {
   std::unique_ptr<VulkanDescriptorTemplateKHRFunctions> descriptor_template_khr_functions{nullptr};
   std::unique_ptr<VulkanGetBufferMemoryRequirements2Functions>
       get_buffer_memory_requirements_2_functions{nullptr};
+  std::unique_ptr<VulkanGetImageMemoryRequirements2Functions>
+      get_image_memory_requirements_2_functions{nullptr};
   std::unique_ptr<VulkanQueueInsertDebugUtilsLabelFunctions>
       queue_insert_debug_utils_label_functions{nullptr};
   // Memory type index for compute
@@ -308,6 +319,9 @@ uint32_t FindMemoryType(const VulkanDevice& device, VkBufferCreateInfo info,
 
 VkBufferCreateInfo MakeBufferCreateInfo(size_t nbytes, VkBufferUsageFlags usage);
 
+VkImageCreateInfo MakeImageCreateInfo(VkFormat format, uint32_t width, uint32_t height,
+                                      uint32_t layers, VkImageUsageFlags usage);
+
 }  // namespace vulkan
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc
index a2ff8bb7ce0e..0b025bc41c7f 100644
--- a/src/runtime/vulkan/vulkan_device_api.cc
+++ b/src/runtime/vulkan/vulkan_device_api.cc
@@ -22,17 +22,48 @@
 #include <tvm/ffi/reflection/registry.h>
 
 #include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
 #include <memory>
 #include <set>
 #include <string>
 #include <utility>
 
+#include "../memory/pooled_allocator.h"
+#include "vulkan_buffer.h"
 #include "vulkan_common.h"
+#include "vulkan_image.h"
+#include "vulkan_timer.h"
 
 namespace tvm {
 namespace runtime {
 namespace vulkan {
 
+using tvm::runtime::memory::Buffer;
+
+struct ImageInfo {
+  VkOffset3D origin;
+  VkExtent3D region;
+  uint32_t layer_count;
+};
+
+ImageInfo GetImageInfo(const VulkanImage* image, const DLTensor* tensor) {
+  ImageInfo info{};
+
+  ICHECK(tensor->dtype.lanes == 1) << "Image dtype has lanes: " << tensor->dtype.lanes;
+
+  info.origin = {0, 0, 0};
+  info.layer_count = 0;
+  size_t axis = DefaultTextureLayoutSeparator(tensor->ndim,
+                                              VulkanResource::ScopeFromMemoryLayout(image->layout));
+  auto texture_shape = ApplyTexture2DFlattening<int64_t>(tensor->shape, tensor->ndim, axis);
+  info.region = {static_cast<uint32_t>(texture_shape.width),
+                 static_cast<uint32_t>(texture_shape.height), 1};
+  info.layer_count = static_cast<uint32_t>(texture_shape.depth);
+  return info;
+}
+
 VulkanDeviceAPI* VulkanDeviceAPI::Global() {
   // Most of the TVM Global() functions allocate with "new" and do
   // not deallocate, as the OS can clean up any leftover buffers at
@@ -175,7 +206,8 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, ffi::Any* rv) {
       // devices that support the VK_EXT_memory_budget extension.
       break;
     case kImagePitchAlignment:
-      return;
+      *rv = int64_t(prop.image_row_align);
+      break;
   }
 }
 
@@ -279,39 +311,153 @@ void VulkanDeviceAPI::GetTargetProperty(Device dev, const std::string& property,
   if (property == "max_spirv_version") {
     *rv = int64_t(prop.max_spirv_version);
   }
+  if (property == "image_row_align") {
+    *rv = int64_t(prop.image_row_align);
+  }
 }
 
-void* VulkanDeviceAPI::AllocDataSpace(Device dev, size_t nbytes, size_t alignment,
-                                      DLDataType type_hint) {
+size_t VulkanDeviceAPI::GetImageAlignment(Device dev) {
+  const auto& device = this->device(dev.device_id);
+  return device.device_properties.image_row_align;
+}
+
+size_t VulkanDeviceAPI::GetDataSize(const DLTensor& arr, ffi::Optional<ffi::String> mem_scope) {
+  if (!mem_scope.has_value() || mem_scope.value().empty() || mem_scope.value() == "global") {
+    return DeviceAPI::GetDataSize(arr);
+  }
+
+  uint32_t row_align = static_cast<uint32_t>(GetImageAlignment(arr.device));
+  std::vector<int64_t> shape;
+  shape.assign(arr.shape, arr.shape + arr.ndim);
+  return runtime::GetTextureMemorySize<std::vector<int64_t>>(shape, arr.dtype.bits, arr.dtype.lanes,
+                                                             mem_scope.value(), row_align);
+}
+
+static size_t GetMemObjectSize(Device dev, int ndim, const int64_t* shape, DLDataType dtype) {
+  DLTensor temp;
+  temp.data = nullptr;
+  temp.device = dev;
+  temp.ndim = ndim;
+  temp.dtype = dtype;
+  temp.shape = const_cast<int64_t*>(shape);
+  temp.strides = nullptr;
+  temp.byte_offset = 0;
+  size_t size = DeviceAPI::Get(dev)->GetDataSize(temp);
+  return size;
+}
+
+void* VulkanDeviceAPI::AllocVulkanBuffer(Device dev, size_t nbytes, DLDataType type_hint,
+                                         std::shared_ptr<VulkanMemory> memory) {
   if (nbytes == 0) {
     // Vulkan seems to have issues if we return nullptr on zero size alloc
     nbytes = 1;
   }
+
+  // For a standard buffer allocation, use the default layout (1D Buffer)
+  auto mem_scope = std::optional<std::string>("global");
+
   const auto& device = this->device(dev.device_id);
   auto usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-  return new VulkanBuffer(device, nbytes, usage, device.compute_mtype_index);
+
+  return new VulkanBuffer(device, nbytes, usage, device.compute_mtype_index, mem_scope, memory);
 }
 
-void VulkanDeviceAPI::FreeDataSpace(Device dev, void* ptr) {
-  // Before releasing the vkBuffer, call sync to
-  // finish all the vulkan commands that reference the buffer.
+void* VulkanDeviceAPI::AllocVulkanImage(Device dev, size_t width, size_t height, size_t layers,
+                                        DLDataType type_hint, ffi::Optional<ffi::String> mem_scope,
+                                        std::shared_ptr<VulkanMemory> memory) {
+  const auto& device = this->device(dev.device_id);
+  auto format = DTypeToVulkanFormat(type_hint);  // Use the new function to get the format
+  auto usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+               VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+
+  // image and view creation
+  VulkanImage* image = new VulkanImage(device, format, width, height, layers, usage,
+                                       device.compute_mtype_index, mem_scope.value(), memory);
+  image->CreateImageView(format);
+  return image;
+}
+
+void* VulkanDeviceAPI::AllocDataSpace(Device dev, size_t nbytes, size_t alignment,
+                                      DLDataType type_hint) {
+  return AllocVulkanBuffer(dev, nbytes, type_hint, nullptr);
+}
+
+void* VulkanDeviceAPI::AllocDataSpace(Device dev, size_t width, size_t height, size_t depth,
+                                      DLDataType type_hint, ffi::Optional<ffi::String> mem_scope) {
+  if (!mem_scope.has_value()) {
+    mem_scope = ffi::String("global.texture");
+  }
+  return AllocVulkanImage(dev, width, height, depth, type_hint, mem_scope, nullptr);
+}
+
+void* VulkanDeviceAPI::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
+                                      ffi::Optional<ffi::String> mem_scope) {
+  if (!mem_scope.has_value() || mem_scope.value().empty() || mem_scope.value() == "global") {
+    size_t size = GetMemObjectSize(dev, ndim, shape, dtype);
+    auto buf = MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kPooled)
+                   ->Alloc(dev, size, kTempAllocaAlignment, dtype);
+    return buf.data;
+  }
+
+  size_t axis = DefaultTextureLayoutSeparator(ndim, mem_scope.value());
+  auto texture = ApplyTexture2DFlattening<int64_t>(shape, ndim, axis);
+
+  return AllocDataSpace(dev, texture.width, texture.height, texture.depth, dtype, mem_scope);
+}
+
+void* VulkanDeviceAPI::AllocDataSpaceView(Device dev, void* data, ffi::Shape shape,
+                                          DLDataType dtype, ffi::Optional<ffi::String> mem_scope) {
+  const auto* res = static_cast<const VulkanResource*>(data);
+
+  if (!mem_scope.has_value() || mem_scope.value().empty() || mem_scope.value() == "global") {
+    size_t nbytes = GetMemObjectSize(dev, shape.size(), shape.data(), dtype);
+    return AllocVulkanBuffer(dev, nbytes, dtype, res->memory);
+  }
+  size_t axis = DefaultTextureLayoutSeparator(shape.size(), mem_scope.value());
+  auto texture = ApplyTexture2DFlattening<int64_t>(shape.data(), shape.size(), axis);
+  return AllocVulkanImage(dev, texture.width, texture.height, texture.depth, dtype, mem_scope,
+                          res->memory);
+}
+
+void VulkanDeviceAPI::FreeDataSpaceView(Device dev, void* ptr) {
   StreamSync(dev, nullptr);
+  const auto* res = static_cast<const VulkanResource*>(ptr);
 
-  auto* pbuf = static_cast<VulkanBuffer*>(ptr);
-  delete pbuf;
+  if (const auto* buf_res = dynamic_cast<const VulkanBuffer*>(res)) {
+    delete buf_res;
+  } else if (const auto* img_res = dynamic_cast<const VulkanImage*>(res)) {
+    delete img_res;
+  }
+}
+
+void VulkanDeviceAPI::FreeDataSpace(Device dev, void* ptr) {
+  // Get Vulkan stream associated with the device
+  VulkanStream& stream = device(dev.device_id).ThreadLocalStream();
+  const auto* res = static_cast<const VulkanResource*>(ptr);
+
+  if (const auto* buf_res = dynamic_cast<const VulkanBuffer*>(res)) {
+    // Defer buffer destruction by scheduling it in VulkanStream
+    stream.Launch([buf_res](VulkanStreamState* state) { delete buf_res; });
+  } else if (const auto* img_res = dynamic_cast<const VulkanImage*>(res)) {
+    // Defer image destruction in VulkanStream
+    stream.Launch([img_res](VulkanStreamState* state) { delete img_res; });
+  }
 }
 
 void* VulkanDeviceAPI::AllocWorkspace(Device dev, size_t size, DLDataType type_hint) {
-  auto& pool = pool_per_thread.GetOrMake(kDLVulkan, this);
-  return pool.AllocWorkspace(dev, size);
+  // Use MemoryManager to allocate workspace memory.
+  auto buffer = MemoryManager::GetOrCreateAllocator(dev, AllocatorType::kPooled)
+                    ->Alloc(dev, size, kTempAllocaAlignment, type_hint);
+  return buffer.data;
 }
 
 void VulkanDeviceAPI::FreeWorkspace(Device dev, void* data) {
-  auto* pool = pool_per_thread.Get();
-  ICHECK(pool) << "Attempted to free a vulkan workspace on a CPU-thread "
-               << "that has never allocated a workspace";
-  pool->FreeWorkspace(dev, data);
+  // Use MemoryManager to free workspace memory.
+  Allocator* allocator = MemoryManager::GetAllocator(dev, AllocatorType::kPooled);
+  Buffer buffer;
+  buffer.data = data;
+  allocator->Free(buffer);
 }
 
 TVMStreamHandle VulkanDeviceAPI::CreateStream(Device dev) { return nullptr; }
@@ -332,33 +478,99 @@ void VulkanDeviceAPI::StreamSync(Device dev, TVMStreamHandle stream) {
   device(dev.device_id).ThreadLocalStream().Synchronize();
 }
 
-void VulkanDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void* to,
-                                     size_t to_offset, size_t size, Device dev_from, Device dev_to,
-                                     DLDataType type_hint, TVMStreamHandle stream) {
+void VulkanDeviceAPI::SetStream(Device dev, TVMStreamHandle stream) {
+  ICHECK_EQ(stream, static_cast<void*>(nullptr));
+}
+
+TVMStreamHandle VulkanDeviceAPI::GetCurrentStream(Device dev) { return nullptr; }
+
+void VulkanDeviceAPI::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
   ICHECK(stream == nullptr);
-  Device dev = dev_from;
-  if (dev_from.device_type == kDLCPU) {
-    dev = dev_to;
-  }
+  ICHECK(from->device.device_type == kDLVulkan || from->device.device_type == kDLCPU);
+  ICHECK(to->device.device_type == kDLVulkan || to->device.device_type == kDLCPU);
+
+  size_t nbytes = GetDataSize(*from);
+  ICHECK_EQ(nbytes, GetDataSize(*to));
+  ICHECK(IsContiguous(*from) && IsContiguous(*to))
+      << "CopyDataFromTo only supports contiguous array for now";
+
+  Device dev_from = from->device;
+  Device dev_to = to->device;
+  const auto* from_res = static_cast<const VulkanResource*>(from->data);
+  const auto* to_res = static_cast<const VulkanResource*>(to->data);
 
   int from_dev_type = static_cast<int>(dev_from.device_type);
   int to_dev_type = static_cast<int>(dev_to.device_type);
+
   if (from_dev_type == kDLVulkan && to_dev_type == kDLVulkan) {
     ICHECK_EQ(dev_from.device_id, dev_to.device_id)
         << "The Vulkan runtime does not support deviceA to deviceB copies. "
         << "This should be changed to a deviceA to CPU copy, followed by a CPU to deviceB copy";
 
     device(dev_from.device_id).ThreadLocalStream().Launch([=](VulkanStreamState* state) {
-      // 1: copy
-      const auto* from_buf = static_cast<const VulkanBuffer*>(from);
-      auto* to_buf = static_cast<VulkanBuffer*>(to);
-      VkBufferCopy copy_info;
-      copy_info.srcOffset = from_offset;
-      copy_info.dstOffset = to_offset;
-      copy_info.size = size;
-      vkCmdCopyBuffer(state->cmd_buffer_, from_buf->buffer, to_buf->buffer, 1, &copy_info);
-      // 2: barrier(transfer-> compute|transfer)
-      VkMemoryBarrier barrier_info;
+      // Buffer to Buffer Copy
+      if (const auto* from_buf = dynamic_cast<const VulkanBuffer*>(from_res)) {
+        if (const auto* to_buf = dynamic_cast<const VulkanBuffer*>(to_res)) {
+          VkBufferCopy copy_info = {};
+          copy_info.srcOffset = from->byte_offset;
+          copy_info.dstOffset = to->byte_offset;
+          copy_info.size = nbytes;
+          vkCmdCopyBuffer(state->cmd_buffer_, from_buf->buffer, to_buf->buffer, 1, &copy_info);
+        } else if (const auto* to_img = dynamic_cast<const VulkanImage*>(to_res)) {
+          auto image_info = GetImageInfo(to_img, to);
+
+          VkBufferImageCopy copy_info = {};
+          copy_info.bufferOffset = from->byte_offset;
+          copy_info.bufferRowLength = 0;
+          copy_info.bufferImageHeight = 0;
+          copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+          copy_info.imageSubresource.mipLevel = 0;
+          copy_info.imageSubresource.baseArrayLayer = 0;
+          copy_info.imageSubresource.layerCount = image_info.layer_count;
+          copy_info.imageOffset = {0, 0, 0};
+          copy_info.imageExtent = image_info.region;
+          vkCmdCopyBufferToImage(state->cmd_buffer_, from_buf->buffer, to_img->image,
+                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_info);
+        }
+      } else if (const auto* from_img = dynamic_cast<const VulkanImage*>(from_res)) {
+        if (const auto* to_buf = dynamic_cast<const VulkanBuffer*>(to_res)) {
+          auto image_info = GetImageInfo(from_img, from);
+
+          VkBufferImageCopy copy_info = {};
+          copy_info.bufferOffset = to->byte_offset;
+          copy_info.bufferRowLength = 0;
+          copy_info.bufferImageHeight = 0;
+          copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+          copy_info.imageSubresource.mipLevel = 0;
+          copy_info.imageSubresource.baseArrayLayer = 0;
+          copy_info.imageSubresource.layerCount = image_info.layer_count;
+          copy_info.imageOffset = {0, 0, 0};
+          copy_info.imageExtent = image_info.region;
+          vkCmdCopyImageToBuffer(state->cmd_buffer_, from_img->image,
+                                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, to_buf->buffer, 1,
+                                 &copy_info);
+        } else if (const auto* to_img = dynamic_cast<const VulkanImage*>(to_res)) {
+          auto image_info = GetImageInfo(from_img, from);
+
+          VkImageCopy copy_info = {};
+          copy_info.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+          copy_info.srcSubresource.mipLevel = 0;
+          copy_info.srcSubresource.baseArrayLayer = 0;
+          copy_info.srcSubresource.layerCount = image_info.layer_count;
+          copy_info.srcOffset = {0, 0, 0};
+          copy_info.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+          copy_info.dstSubresource.mipLevel = 0;
+          copy_info.dstSubresource.baseArrayLayer = 0;
+          copy_info.dstSubresource.layerCount = image_info.layer_count;
+          copy_info.dstOffset = {0, 0, 0};
+          copy_info.extent = image_info.region;
+          vkCmdCopyImage(state->cmd_buffer_, from_img->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                         to_img->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_info);
+        }
+      }
+
+      // Memory barrier to ensure proper synchronization
+      VkMemoryBarrier barrier_info = {};
       barrier_info.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
       barrier_info.pNext = nullptr;
       barrier_info.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
@@ -370,43 +582,93 @@ void VulkanDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void*
     });
 
   } else if (from_dev_type == kDLVulkan && to_dev_type == kDLCPU) {
-    const auto* from_buf = static_cast<const VulkanBuffer*>(from);
     auto& device = this->device(dev_from.device_id);
     auto& stream = device.ThreadLocalStream();
-    auto& staging_buffer = device.ThreadLocalStagingBuffer(size);
+    auto& staging_buffer = device.ThreadLocalStagingBuffer(nbytes);
+
     stream.Launch([&](VulkanStreamState* state) {
-      VkBufferCopy copy_info;
-      copy_info.srcOffset = from_offset;
-      copy_info.dstOffset = 0;
-      copy_info.size = size;
-      vkCmdCopyBuffer(state->cmd_buffer_, from_buf->buffer, staging_buffer.vk_buf.buffer, 1,
-                      &copy_info);
+      if (const auto* from_buf = dynamic_cast<const VulkanBuffer*>(from_res)) {
+        VkBufferCopy copy_info = {};
+        copy_info.srcOffset = from->byte_offset;
+        copy_info.dstOffset = 0;
+        copy_info.size = nbytes;
+        vkCmdCopyBuffer(state->cmd_buffer_, from_buf->buffer, staging_buffer.vk_buf.buffer, 1,
+                        &copy_info);
+      } else if (const auto* from_img = dynamic_cast<const VulkanImage*>(from_res)) {
+        auto image_info = GetImageInfo(from_img, from);
+
+        // Ensure the image is in the correct layout for transfer
+        VkImageMemoryBarrier img_barrier = {};
+        img_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+        img_barrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;  // Original layout
+        img_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+        img_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+        img_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+        img_barrier.image = from_img->image;
+        img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        img_barrier.subresourceRange.baseMipLevel = 0;
+        img_barrier.subresourceRange.levelCount = 1;
+        img_barrier.subresourceRange.baseArrayLayer = 0;
+        img_barrier.subresourceRange.layerCount = image_info.layer_count;
+        img_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
+        img_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+
+        vkCmdPipelineBarrier(state->cmd_buffer_, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1,
+                             &img_barrier);
+        VkBufferImageCopy copy_info = {};
+        copy_info.bufferOffset = 0;
+        copy_info.bufferRowLength = 0;
+        copy_info.bufferImageHeight = 0;
+        copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        copy_info.imageSubresource.mipLevel = 0;
+        copy_info.imageSubresource.baseArrayLayer = 0;
+        copy_info.imageSubresource.layerCount = image_info.layer_count;
+        copy_info.imageOffset = {0, 0, 0};
+        copy_info.imageExtent = image_info.region;
+        vkCmdCopyImageToBuffer(state->cmd_buffer_, from_img->image,
+                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, staging_buffer.vk_buf.buffer,
+                               1, &copy_info);
+
+        // Restore the image layout
+        img_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+        img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+        img_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+        img_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+        vkCmdPipelineBarrier(state->cmd_buffer_, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1,
+                             &img_barrier);
+      }
     });
+
     stream.Synchronize();
     stream.ProfilerReset();
     if (!device.coherent_staging) {
       VkMappedMemoryRange mrange;
       mrange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
       mrange.pNext = nullptr;
-      mrange.memory = staging_buffer.vk_buf.memory;
+      mrange.memory = staging_buffer.vk_buf.memory->memory_;
       mrange.offset = 0;
-      mrange.size = VK_WHOLE_SIZE;  // size;
+      mrange.size = VK_WHOLE_SIZE;
       VULKAN_CALL(vkInvalidateMappedMemoryRanges(device, 1, &mrange));
     }
-    memcpy(static_cast<char*>(to) + to_offset, static_cast<char*>(staging_buffer.host_addr), size);
+    memcpy(static_cast<char*>(to->data) + to->byte_offset,
+           static_cast<char*>(staging_buffer.host_addr), nbytes);
+
   } else if (from_dev_type == kDLCPU && to_dev_type == kDLVulkan) {
     auto& device = this->device(dev_to.device_id);
     auto& stream = device.ThreadLocalStream();
-    const auto* to_buf = static_cast<const VulkanBuffer*>(to);
-    auto& staging_buffer = device.ThreadLocalStagingBuffer(size);
-    memcpy(staging_buffer.host_addr, static_cast<const char*>(from) + from_offset, size);
+    auto& staging_buffer = device.ThreadLocalStagingBuffer(nbytes);
+    memcpy(staging_buffer.host_addr, static_cast<const char*>(from->data) + from->byte_offset,
+           nbytes);
+
     // host side flush if access is not coherent.
     // so writes from CPU is visible to GPU
     if (!device.coherent_staging) {
       VkMappedMemoryRange mrange;
       mrange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
       mrange.pNext = nullptr;
-      mrange.memory = staging_buffer.vk_buf.memory;
+      mrange.memory = staging_buffer.vk_buf.memory->memory_;
       mrange.offset = 0;
       mrange.size = VK_WHOLE_SIZE;  // size;
       VULKAN_CALL(vkFlushMappedMemoryRanges(device, 1, &mrange));
@@ -422,19 +684,35 @@ void VulkanDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void*
       vkCmdPipelineBarrier(state->cmd_buffer_, VK_PIPELINE_STAGE_HOST_BIT,
                            VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrier_info, 0, nullptr, 0,
                            nullptr);
-      // 1: copy
-      VkBufferCopy copy_info;
-      copy_info.srcOffset = 0;
-      copy_info.dstOffset = to_offset;
-      copy_info.size = size;
-      vkCmdCopyBuffer(state->cmd_buffer_, staging_buffer.vk_buf.buffer, to_buf->buffer, 1,
-                      &copy_info);
+
+      if (const auto* to_buf = dynamic_cast<const VulkanBuffer*>(to_res)) {
+        VkBufferCopy copy_info;
+        copy_info.srcOffset = 0;
+        copy_info.dstOffset = to->byte_offset;
+        copy_info.size = nbytes;
+        vkCmdCopyBuffer(state->cmd_buffer_, staging_buffer.vk_buf.buffer, to_buf->buffer, 1,
+                        &copy_info);
+      } else if (const auto* to_img = dynamic_cast<const VulkanImage*>(to_res)) {
+        auto image_info = GetImageInfo(to_img, to);
+
+        VkBufferImageCopy copy_info = {};
+        copy_info.bufferOffset = 0;
+        copy_info.bufferRowLength = 0;
+        copy_info.bufferImageHeight = 0;
+        copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        copy_info.imageSubresource.mipLevel = 0;
+        copy_info.imageSubresource.baseArrayLayer = 0;
+        copy_info.imageSubresource.layerCount = image_info.layer_count;
+        copy_info.imageOffset = {0, 0, 0};
+        copy_info.imageExtent = image_info.region;
+        vkCmdCopyBufferToImage(state->cmd_buffer_, staging_buffer.vk_buf.buffer, to_img->image,
+                               VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_info);
+      }
     });
 
     stream.ProfilerReady();
-    // TODO(tulloch): should we instead make the staging buffer a property of the
-    // Stream? This would allow us to elide synchronizations here.
     stream.Synchronize();
+
   } else {
     LOG(FATAL) << "Expect copy from/to Vulkan or between Vulkan"
                << ", from=" << from_dev_type << ", to=" << to_dev_type;
@@ -459,13 +737,148 @@ TVM_FFI_STATIC_INIT_BLOCK() {
                     DeviceAPI* ptr = VulkanDeviceAPI::Global();
                     *rv = static_cast<void*>(ptr);
                   })
-      .def("device_api.vulkan.get_target_property", [](Device dev, const std::string& property) {
-        ffi::Any rv;
-        VulkanDeviceAPI::Global()->GetTargetProperty(dev, property, &rv);
-        return rv;
+      .def("device_api.vulkan.get_target_property",
+           [](Device dev, const std::string& property) {
+             ffi::Any rv;
+             VulkanDeviceAPI::Global()->GetTargetProperty(dev, property, &rv);
+             return rv;
+           })
+      .def_packed("device_api.vulkan.alloc_nd",
+                  [](ffi::PackedArgs args, ffi::Any* rv) {
+                    int32_t device_type = args[0].cast<int32_t>();
+                    int32_t device_id = args[1].cast<int32_t>();
+                    int32_t dtype_code_hint = args[2].cast<int32_t>();
+                    int32_t dtype_bits_hint = args[3].cast<int32_t>();
+                    std::string scope = args[4].cast<std::string>();
+
+                    CHECK(scope.find("texture") != std::string::npos);
+                    int64_t ndim = args[5].cast<int64_t>();
+                    CHECK_EQ(ndim, 2);
+                    int64_t* shape = static_cast<int64_t*>(args[6].cast<void*>());
+                    int64_t width = shape[0];
+                    int64_t height = shape[1];
+                    int64_t depth = shape[2];
+
+                    Device dev;
+                    dev.device_type = static_cast<DLDeviceType>(device_type);
+                    dev.device_id = device_id;
+
+                    DLDataType type_hint;
+                    type_hint.code = static_cast<decltype(type_hint.code)>(dtype_code_hint);
+                    type_hint.bits = static_cast<decltype(type_hint.bits)>(dtype_bits_hint);
+                    type_hint.lanes = 1;
+
+                    *rv = VulkanDeviceAPI::Global()->AllocDataSpace(
+                        dev, static_cast<size_t>(width), static_cast<size_t>(height),
+                        static_cast<size_t>(depth), type_hint,
+                        ffi::Optional<ffi::String>("global.texture"));
+                  })
+      .def_packed("device_api.vulkan.free_nd", [](ffi::PackedArgs args, ffi::Any* rv) {
+        int32_t device_type = args[0].cast<int32_t>();
+        int32_t device_id = args[1].cast<int32_t>();
+        std::string scope = args[2].cast<std::string>();
+        CHECK(scope.find("texture") != std::string::npos);
+        void* data = args[3].cast<void*>();
+        Device dev;
+        dev.device_type = static_cast<DLDeviceType>(device_type);
+        dev.device_id = device_id;
+        VulkanDeviceAPI::Global()->FreeDataSpace(dev, data);
+        *rv = static_cast<int32_t>(0);
       });
 }
 
+TVM_FFI_STATIC_INIT_BLOCK() {
+  namespace refl = tvm::ffi::reflection;
+  refl::GlobalDef().def("profiling.timer.vulkan",
+                        [](Device dev) { return Timer(ffi::make_object<VulkanTimerNode>(dev)); });
+}
+
+class VulkanPooledAllocator final : public memory::PooledAllocator {
+ public:
+  explicit VulkanPooledAllocator() : PooledAllocator() {}
+
+  bool AllowMemoryScope(const std::string& mem_scope) const final {
+    return ((mem_scope.find("texture") != std::string::npos) || mem_scope.empty() ||
+            ("global" == mem_scope));
+  }
+
+  Buffer Alloc(Device dev, size_t nbytes, size_t alignment, DLDataType type_hint) override {
+    std::lock_guard<std::recursive_mutex> lock(mu_);
+    size_t size = ((nbytes + page_size_ - 1) / page_size_) * page_size_;
+    auto&& it = memory_pool_.find(size);
+    if (it != memory_pool_.end() && !it->second.empty()) {
+      auto&& pool = it->second;
+      auto ret = pool.back();
+      pool.pop_back();
+      return ret;
+    }
+    Buffer buf;
+    buf.device = dev;
+    buf.size = size;
+    buf.alloc_type = AllocatorType::kPooled;
+    try {
+      buf.data = DeviceAllocDataSpace(dev, size, alignment, type_hint);
+    } catch (InternalError& err) {
+      LOG(WARNING) << "PooledAllocator got InternalError during allocation: " << err.message();
+      LOG(WARNING) << "Trying to release all unused memory and reallocate...";
+      ReleaseAll();
+      buf.data = DeviceAllocDataSpace(dev, size, alignment, type_hint);
+    }
+
+    used_memory_.fetch_add(size, std::memory_order_relaxed);
+    VLOG(1) << "allocate " << size << " B, used memory " << used_memory_ << " B";
+    return buf;
+  }
+
+  Buffer Alloc(Device dev, ffi::Shape shape, DLDataType type_hint,
+               const std::string& mem_scope) override {
+    if (AllowMemoryScope(mem_scope)) {
+      size_t size = GetMemObjectSize(dev, shape.size(), shape.data(), type_hint);
+      Buffer buf;
+      buf.device = dev;
+      buf.size = size;
+      buf.alloc_type = AllocatorType::kPooled;
+      buf.data = DeviceAPI::Get(dev)->AllocDataSpace(dev, shape.size(), shape.data(), type_hint,
+                                                     ffi::String(mem_scope));
+      if (mem_scope.find("texture") == std::string::npos) {
+        // All textures are backed by buffers - don't count in total memory
+        used_memory_.fetch_add(size, std::memory_order_relaxed);
+      }
+      DLOG(INFO) << "allocate " << size << " B, used memory " << used_memory_ << " B";
+      return buf;
+    }
+    LOG(FATAL) << "Unsupported memory scope for this Allocator:" << mem_scope;
+    return {};
+  }
+
+  void Free(const Buffer& buffer) override {
+    std::lock_guard<std::recursive_mutex> lock(mu_);
+    if (memory_pool_.find(buffer.size) == memory_pool_.end()) {
+      memory_pool_.emplace(buffer.size, std::vector<Buffer>{});
+    }
+    memory_pool_.at(buffer.size).push_back(buffer);
+    VLOG(1) << "reclaim buffer " << buffer.size;
+  }
+
+  void* CreateView(const Buffer& buffer, ffi::Shape shape, DLDataType type_hint,
+                   const std::string& mem_scope) final {
+    return VulkanDeviceAPI::Global()->AllocDataSpaceView(
+        buffer.device, buffer.data, shape, type_hint, ffi::Optional<ffi::String>(mem_scope));
+  }
+
+  void FreeView(Device dev, void* data) final {
+    return VulkanDeviceAPI::Global()->FreeDataSpaceView(dev, data);
+  }
+};
+
+TVM_FFI_STATIC_INIT_BLOCK() {
+  namespace refl = tvm::ffi::reflection;
+  refl::GlobalDef().def_packed("DeviceAllocator.vulkan", [](ffi::PackedArgs args, ffi::Any* rv) {
+    Allocator* alloc = new VulkanPooledAllocator();
+    *rv = static_cast<void*>(alloc);
+  });
+}
+
 }  // namespace vulkan
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/vulkan/vulkan_device_api.h b/src/runtime/vulkan/vulkan_device_api.h
index 5e9bfeb8c086..b286b2766d37 100644
--- a/src/runtime/vulkan/vulkan_device_api.h
+++ b/src/runtime/vulkan/vulkan_device_api.h
@@ -21,11 +21,15 @@
 #define TVM_RUNTIME_VULKAN_VULKAN_DEVICE_API_H_
 
 #include <tvm/runtime/device_api.h>
+#include <tvm/runtime/memory/memory_manager.h>
+#include <tvm/runtime/tensor.h>
 #include <vulkan/vulkan_core.h>
 
+#include <memory>
 #include <string>
 #include <vector>
 
+#include "../texture.h"
 #include "../thread_map.h"
 #include "../workspace_pool.h"
 #include "vulkan/vulkan_core.h"
@@ -47,8 +51,22 @@ class VulkanDeviceAPI final : public DeviceAPI {
   void GetAttr(Device dev, DeviceAttrKind kind, ffi::Any* rv) final;
 
   // Implement memory management required by DeviceAPI
+  void* AllocVulkanBuffer(Device dev, size_t nbytes, DLDataType type_hint,
+                          std::shared_ptr<VulkanMemory> memory);
+  void* AllocVulkanImage(Device dev, size_t width, size_t height, size_t layers,
+                         DLDataType type_hint, ffi::Optional<ffi::String> mem_scope,
+                         std::shared_ptr<VulkanMemory> memory);
   void* AllocDataSpace(Device dev, size_t nbytes, size_t alignment, DLDataType type_hint) final;
+  void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
+                       ffi::Optional<ffi::String> mem_scope = std::nullopt) final;
+  void* AllocDataSpace(Device dev, size_t width, size_t height, size_t depth, DLDataType type_hint,
+                       ffi::Optional<ffi::String> mem_scope = std::nullopt);
+  void* AllocDataSpaceView(Device dev, void* data, ffi::Shape shape, DLDataType dtype,
+                           ffi::Optional<ffi::String> mem_scope = std::nullopt);
+
   void FreeDataSpace(Device dev, void* ptr) final;
+  void FreeDataSpaceView(Device dev, void* ptr);
+
   void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final;
   void FreeWorkspace(Device dev, void* data) final;
 
@@ -61,11 +79,62 @@ class VulkanDeviceAPI final : public DeviceAPI {
   void FreeStream(Device dev, TVMStreamHandle stream) final;
   void SyncStreamFromTo(Device dev, TVMStreamHandle event_src, TVMStreamHandle event_dst) final;
   void StreamSync(Device dev, TVMStreamHandle stream) final;
-
- protected:
-  void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size,
-                      Device dev_from, Device dev_to, DLDataType type_hint,
-                      TVMStreamHandle stream) final;
+  void SetStream(Device dev, TVMStreamHandle stream) final;
+  TVMStreamHandle GetCurrentStream(Device dev) final;
+  size_t GetDataSize(const DLTensor& arr,
+                     ffi::Optional<ffi::String> mem_scope = std::nullopt) final;
+
+  void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) final;
+
+  // Check if the device is a Vulkan device
+  virtual bool IsVulkanDevice(Device dev) { return dev.device_type == kDLVulkan; }
+
+  inline VkFormat DTypeToVulkanFormat(DLDataType data_type, int num_channels = 4) {
+    DataType dtype(data_type);
+
+    // Print information about the DataType for debugging
+    // PrintDataTypeInfo(dtype);
+    if (num_channels == 1) {
+      if (dtype == DataType::Float(32)) {
+        return VK_FORMAT_R32_SFLOAT;
+      } else if (dtype == DataType::Float(16)) {
+        return VK_FORMAT_R16_SFLOAT;
+      } else if (dtype == DataType::Int(8)) {
+        return VK_FORMAT_R8_SINT;
+      } else if (dtype == DataType::Int(16)) {
+        return VK_FORMAT_R16_SINT;
+      } else if (dtype == DataType::Int(32)) {
+        return VK_FORMAT_R32_SINT;
+      } else if (dtype == DataType::UInt(8)) {
+        return VK_FORMAT_R8_UINT;
+      } else if (dtype == DataType::UInt(16)) {
+        return VK_FORMAT_R16_UINT;
+      } else if (dtype == DataType::UInt(32)) {
+        return VK_FORMAT_R32_UINT;
+      }
+    } else if (num_channels == 4) {
+      if (dtype == DataType::Float(32)) {
+        return VK_FORMAT_R32G32B32A32_SFLOAT;  // 4-channel 32-bit float
+      } else if (dtype == DataType::Float(16)) {
+        return VK_FORMAT_R16G16B16A16_SFLOAT;  // 4-channel 16-bit float
+      } else if (dtype == DataType::Int(8)) {
+        return VK_FORMAT_R8G8B8A8_SINT;  // 4-channel 8-bit signed integer
+      } else if (dtype == DataType::Int(16)) {
+        return VK_FORMAT_R16G16B16A16_SINT;  // 4-channel 16-bit signed integer
+      } else if (dtype == DataType::Int(32)) {
+        return VK_FORMAT_R32G32B32A32_SINT;  // 4-channel 32-bit signed integer
+      } else if (dtype == DataType::UInt(8)) {
+        return VK_FORMAT_R8G8B8A8_UINT;  // 4-channel 8-bit unsigned integer
+      } else if (dtype == DataType::UInt(16)) {
+        return VK_FORMAT_R16G16B16A16_UINT;  // 4-channel 16-bit unsigned integer
+      } else if (dtype == DataType::UInt(32)) {
+        return VK_FORMAT_R32G32B32A32_UINT;  // 4-channel 32-bit unsigned integer
+      }
+    }
+    LOG(FATAL) << "Unsupported data type or channel count for Vulkan runtime: " << dtype
+               << ", channels: " << num_channels;
+    return VK_FORMAT_UNDEFINED;  // Fallback, should not reach here
+  }
 
   // End of required methods for the DeviceAPI interface
 
@@ -107,6 +176,8 @@ class VulkanDeviceAPI final : public DeviceAPI {
    */
   void GetTargetProperty(Device dev, const std::string& property, ffi::Any* rv) final;
 
+  size_t GetImageAlignment(Device dev);
+
  private:
   std::vector<uint32_t> GetComputeQueueFamilies(VkPhysicalDevice phy_dev);
 
diff --git a/src/runtime/vulkan/vulkan_image.cc b/src/runtime/vulkan/vulkan_image.cc
new file mode 100644
index 000000000000..8a6f731a2bdb
--- /dev/null
+++ b/src/runtime/vulkan/vulkan_image.cc
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "vulkan_image.h"
+
+#include <cstdint>
+#include <string>
+#include <utility>
+
+#include "vulkan_device_api.h"
+
+namespace tvm {
+namespace runtime {
+namespace vulkan {
+
+VkImageCreateInfo MakeImageCreateInfo(VkFormat format, uint32_t width, uint32_t height,
+                                      uint32_t layers, VkImageUsageFlags usage) {
+  VkImageCreateInfo info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
+  info.imageType = VK_IMAGE_TYPE_2D;
+  info.flags = 0;
+  info.format = format;
+  info.extent.width = width;
+  info.extent.height = height;
+  info.extent.depth = 1;  // Must be 1 for 2d images
+  info.mipLevels = 1;
+  info.arrayLayers = layers;
+  info.samples = VK_SAMPLE_COUNT_1_BIT;
+  info.tiling = VK_IMAGE_TILING_LINEAR;
+  info.usage = usage;
+  info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+  return info;
+}
+
+VulkanImage::VulkanImage(const VulkanDevice& device, VkFormat format, uint32_t width,
+                         uint32_t height, uint32_t layers, VkImageUsageFlags usage,
+                         uint32_t mem_type_index, std::optional<std::string> mem_scope,
+                         std::shared_ptr<VulkanMemory> back_memory)
+    : VulkanResource(device, mem_scope, back_memory), width(width), height(height), layers(layers) {
+  // Create an image
+  VkImageCreateInfo image_info = MakeImageCreateInfo(format, width, height, layers, usage);
+  VULKAN_CALL(vkCreateImage(device, &image_info, nullptr, &image));
+
+  VkMemoryRequirements mem_reqs;
+  vkGetImageMemoryRequirements(device, image, &mem_reqs);
+
+  // Allocate new memory if no memory is passed in or if the existing memory is not compatible
+  if (!memory) {
+    AllocateMemory(mem_reqs, mem_type_index);
+  }
+  // Bind the image to the allocated memory
+  VULKAN_CALL(vkBindImageMemory(device, image, memory->memory_, 0));
+}
+
+void VulkanImage::AllocateMemory(const VkMemoryRequirements& mem_reqs, uint32_t mem_type_index) {
+  VkMemoryAllocateInfo mem_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
+  mem_info.allocationSize = mem_reqs.size;
+  mem_info.memoryTypeIndex = mem_type_index;
+
+  // Allocate memory
+  VkDeviceMemory raw_memory;
+  VULKAN_CALL(vkAllocateMemory(device_, &mem_info, nullptr, &raw_memory));
+
+  // Store the allocated memory along with its requirements
+  memory = std::make_shared<VulkanMemory>(raw_memory, mem_reqs);
+}
+
+VulkanImage::~VulkanImage() {
+  if (imageView) {
+    vkDestroyImageView(device_, imageView, nullptr);
+  }
+  if (image) {
+    vkDestroyImage(device_, image, nullptr);
+  }
+}
+
+VulkanImage::VulkanImage(VulkanImage&& other)
+    : VulkanResource(std::move(other)), image(other.image), imageView(other.imageView) {
+  other.image = VK_NULL_HANDLE;
+  other.imageView = VK_NULL_HANDLE;
+}
+
+VulkanImage& VulkanImage::operator=(VulkanImage&& other) {
+  std::swap(device_, other.device_);
+  std::swap(image, other.image);
+  std::swap(memory, other.memory);
+  std::swap(imageView, other.imageView);
+  return *this;
+}
+
+void VulkanImage::CreateImageView(VkFormat format) {
+  VkImageViewCreateInfo view_info = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
+  view_info.image = image;
+  view_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+  view_info.format = format;
+  view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+  view_info.subresourceRange.baseMipLevel = 0;
+  view_info.subresourceRange.levelCount = 1;
+  view_info.subresourceRange.baseArrayLayer = 0;
+  view_info.subresourceRange.layerCount = layers;
+
+  VULKAN_CALL(vkCreateImageView(device_, &view_info, nullptr, &imageView));
+}
+
+bool VulkanImage::UseDedicatedAllocation(const VulkanDevice& device, VkImage image,
+                                         VkDeviceSize* nbytes) {
+  if (device.get_image_memory_requirements_2_functions) {
+    // Which image to request information about
+    VkImageMemoryRequirementsInfo2KHR req_info2 = {
+        VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR};
+    req_info2.image = image;
+
+    // What information to request
+    VkMemoryDedicatedRequirementsKHR dedicated_req;
+    dedicated_req.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR;
+    dedicated_req.pNext = nullptr;
+
+    VkMemoryRequirements2KHR req2 = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR};
+    req2.pNext = &dedicated_req;
+
+    device.get_image_memory_requirements_2_functions->vkGetImageMemoryRequirements2KHR(
+        device, &req_info2, &req2);
+    if (dedicated_req.requiresDedicatedAllocation || dedicated_req.prefersDedicatedAllocation) {
+      *nbytes = req2.memoryRequirements.size;
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace vulkan
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/vulkan/vulkan_image.h b/src/runtime/vulkan/vulkan_image.h
new file mode 100644
index 000000000000..ddea19184b55
--- /dev/null
+++ b/src/runtime/vulkan/vulkan_image.h
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_VULKAN_VULKAN_IMAGE_H_
+#define TVM_RUNTIME_VULKAN_VULKAN_IMAGE_H_
+
+#include <vulkan/vulkan_core.h>
+
+#include <cstddef>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+
+#include "vulkan_resource.h"
+
+namespace tvm {
+namespace runtime {
+namespace vulkan {
+
+class VulkanImage : public VulkanResource {
+ public:
+  /* \brief Allocate and create an image on the device
+   *
+   * \param device Which device should have the image allocation.
+   * The VulkanDevice given should outlive the VulkanImage.
+   *
+   * \param format The format of the image (e.g., VK_FORMAT_R32_SFLOAT)
+   *
+   * \param width The width of the image
+   *
+   * \param height The height of the image
+   *
+   * \param layers The array layers of the image
+   *
+   * \param usage The usage flags for the image (e.g. sampled, transfer destination, etc.)
+   *
+   * \param mem_type_index The memory type to index. This should be
+   * an index to a compatible memory located in
+   * VkPhysicalDeviceMemoryProperties.
+   */
+  VulkanImage(const VulkanDevice& device, VkFormat format, uint32_t width, uint32_t height,
+              uint32_t depth, VkImageUsageFlags usage, uint32_t mem_type_index,
+              std::optional<std::string> mem_scope = std::nullopt,
+              std::shared_ptr<VulkanMemory> back_memory = nullptr);
+
+  ~VulkanImage();
+
+  // Forbid copy assignment/constructor
+  VulkanImage(const VulkanImage&) = delete;
+  VulkanImage& operator=(const VulkanImage&) = delete;
+
+  // Allow move assignment/constructor
+  VulkanImage(VulkanImage&&);
+  VulkanImage& operator=(VulkanImage&&);
+
+  void AllocateMemory(const VkMemoryRequirements& mem_reqs, uint32_t mem_type_index);
+
+  void CreateImageView(VkFormat format);
+
+ private:
+  /*!
+   * \brief Whether this image should be allocated using dedicated allocation
+   *
+   * In typical usage, there will be one VkDeviceMemory that has a
+   * large number of VkImages pointing to it. Currently, the TVM
+   * Vulkan runtime has a single VkImage for each VkDeviceMemory. In
+   * this case, there can be performance benefits by explicitly
+   * marking this as a dedicated allocation. The function returns
+   * true if the device supports the dedicated allocation extension,
+   * and the image either requires or has better performance with a
+   * dedicated allocation.
+   *
+   * \param[out] nbytes If using dedicated allocation, the number of
+   * bytes required for the allocation. If not using dedicated
+   * allocation, this value is unchanged.
+   *
+   * \returns Whether the allocation should use the dedicated
+   * allocation extension.
+   */
+  static bool UseDedicatedAllocation(const VulkanDevice& device, VkImage image,
+                                     VkDeviceSize* nbytes);
+
+ public:
+  /*! \brief Pointer to the device that owns this image.
+   *
+   * Assumes that the VulkanImage will be destructed before the
+   * VulkanDevice, and this will never be a dangling reference.
+   * Stores a VkDevice and not a VulkanDevice, because the
+   * VulkanDevice may be moved to a different location while the
+   * VulkanImage is alive.
+   */
+
+  //! \brief Handle to the logical image on the device
+  VkImage image{VK_NULL_HANDLE};
+
+  //! \brief Handle to the image view
+  VkImageView imageView{VK_NULL_HANDLE};
+
+  // capture the memory requirements.
+  // VkMemoryRequirements mem_reqs;
+
+  // Add width and height members
+  uint32_t width{0};   // Width of the image
+  uint32_t height{0};  // Height of the image
+  uint32_t layers{0};  // Depth of the image
+};
+
+}  // namespace vulkan
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_VULKAN_VULKAN_IMAGE_H_
diff --git a/src/runtime/vulkan/vulkan_resource.cc b/src/runtime/vulkan/vulkan_resource.cc
new file mode 100644
index 000000000000..ccc7f78e4845
--- /dev/null
+++ b/src/runtime/vulkan/vulkan_resource.cc
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "vulkan_resource.h"
+
+#include <tvm/ffi/reflection/registry.h>
+
+#include <string>
+#include <utility>
+
+#include "vulkan_device_api.h"
+
+namespace tvm {
+namespace runtime {
+namespace vulkan {
+
+VulkanResource::VulkanResource(const VulkanDevice& device, std::optional<std::string> mem_scope,
+                               std::shared_ptr<VulkanMemory> back_memory)
+    : device_(device), layout(MemoryLayoutFromScope(mem_scope)), memory(back_memory) {}
+
+VulkanResource::~VulkanResource() {}
+
+VulkanResource::VulkanResource(VulkanResource&& other)
+    : device_(other.device_), layout(other.layout), memory(other.memory) {
+  other.device_ = VK_NULL_HANDLE;
+  other.memory = VK_NULL_HANDLE;
+}
+
+VulkanResource& VulkanResource::operator=(VulkanResource&& other) {
+  if (this != &other) {
+    device_ = other.device_;
+    layout = other.layout;
+    memory = other.memory;
+  }
+  return *this;
+}
+
+VulkanResource::MemoryLayout VulkanResource::MemoryLayoutFromScope(
+    std::optional<std::string> mem_scope) {
+  if (!mem_scope) {
+    return MemoryLayout::kBuffer1D;
+  } else if (*mem_scope == "global") {
+    return MemoryLayout::kBuffer1D;
+  } else if (*mem_scope == "global.texture") {
+    return MemoryLayout::kImage2DActivation;
+  } else if (*mem_scope == "global.texture-weight") {
+    return MemoryLayout::kImage2DWeight;
+  } else if (*mem_scope == "global.texture-nhwc") {
+    return MemoryLayout::kImage2DNHWC;
+  }
+  throw std::runtime_error("No memory layout defined for memory of scope: " + *mem_scope);
+}
+
+std::string VulkanResource::ScopeFromMemoryLayout(MemoryLayout layout) {
+  switch (layout) {
+    case MemoryLayout::kBuffer1D:
+      return "global";
+    case MemoryLayout::kImage2DActivation:
+      return "global.texture";
+    case MemoryLayout::kImage2DWeight:
+      return "global.texture-weight";
+    case MemoryLayout::kImage2DNHWC:
+      return "global.texture-nhwc";
+    default:
+      throw std::runtime_error("No scope corresponding to the provided memory layout: " +
+                               std::to_string(static_cast<int>(layout)));
+  }
+}
+
+}  // namespace vulkan
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/vulkan/vulkan_resource.h b/src/runtime/vulkan/vulkan_resource.h
new file mode 100644
index 000000000000..3c19f432c618
--- /dev/null
+++ b/src/runtime/vulkan/vulkan_resource.h
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_VULKAN_VULKAN_RESOURCE_H_
+#define TVM_RUNTIME_VULKAN_VULKAN_RESOURCE_H_
+
+#include <vulkan/vulkan_core.h>
+
+#include <memory>
+#include <optional>
+#include <string>
+
+namespace tvm {
+namespace runtime {
+namespace vulkan {
+
+class VulkanDevice;
+
+/*!
+ * \brief Class representing Vulkan device memory allocations.
+ *
+ * This class encapsulates a Vulkan device memory allocation and its memory requirements.
+ * It provides functionality to check memory compatibility with new resource requirements.
+ */
+class VulkanMemory {
+ public:
+  /*!
+   * \brief Constructor to create a VulkanMemory instance.
+   *
+   * \param mem The Vulkan device memory handle.
+   * \param mem_reqs The memory requirements associated with this allocation.
+   */
+  VulkanMemory(VkDeviceMemory mem, const VkMemoryRequirements& mem_reqs)
+      : memory_(mem), mem_reqs_(mem_reqs) {}
+
+  /*!
+   * \brief Destructor to free the Vulkan device memory.
+   */
+  ~VulkanMemory() {
+    if (memory_ != VK_NULL_HANDLE) {
+      memory_ = VK_NULL_HANDLE;
+    }
+  }
+
+  VkDeviceMemory memory_;
+  VkMemoryRequirements mem_reqs_;
+};
+
+/*!
+ * \brief Base class for Vulkan resources such as buffers and images.
+ *
+ * This class holds common properties and functionalities for Vulkan resources,
+ * including device association, memory layout, and memory management.
+ */
+class VulkanResource {
+ public:
+  /*!
+   * \brief Enumeration of memory layout types.
+   */
+  enum class MemoryLayout {
+    kBuffer1D,
+    kImage2DActivation,
+    kImage2DWeight,
+    kImage2DNHWC,
+  };
+
+  /*!
+   * \brief Constructor to create a VulkanResource.
+   *
+   * \param device The Vulkan device associated with this resource.
+   * \param mem_scope Optional memory scope string specifying the memory layout.
+   * \param back_memory Optional shared pointer to existing VulkanMemory.
+   */
+  VulkanResource(const VulkanDevice& device, std::optional<std::string> mem_scope,
+                 std::shared_ptr<VulkanMemory> back_memory = nullptr);
+
+  /*!
+   * \brief Virtual destructor.
+   */
+  virtual ~VulkanResource();
+
+  // Forbid copy assignment/constructor
+  VulkanResource(const VulkanResource&) = delete;
+  VulkanResource& operator=(const VulkanResource&) = delete;
+
+  // Allow move assignment/constructor
+  VulkanResource(VulkanResource&& other);
+  VulkanResource& operator=(VulkanResource&& other);
+
+  /*!
+   * \brief Converts a memory scope string to a MemoryLayout enumeration.
+   *
+   * \param mem_scope The optional memory scope string.
+   * \return The corresponding MemoryLayout value.
+   */
+  static MemoryLayout MemoryLayoutFromScope(std::optional<std::string> mem_scope);
+
+  /*!
+   * \brief Converts a MemoryLayout enumeration to a memory scope string.
+   *
+   * \param layout The MemoryLayout value.
+   * \return The corresponding memory scope string.
+   */
+  static std::string ScopeFromMemoryLayout(MemoryLayout layout);
+
+  VkDevice device_{VK_NULL_HANDLE};
+  MemoryLayout layout{MemoryLayout::kBuffer1D};
+  std::shared_ptr<VulkanMemory> memory{nullptr};
+};
+
+}  // namespace vulkan
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_VULKAN_VULKAN_RESOURCE_H_
diff --git a/src/runtime/vulkan/vulkan_stream.h b/src/runtime/vulkan/vulkan_stream.h
index 742a66f15dd4..89d79d8a09aa 100644
--- a/src/runtime/vulkan/vulkan_stream.h
+++ b/src/runtime/vulkan/vulkan_stream.h
@@ -117,6 +117,8 @@ class VulkanStream {
   // Synchronize the current stream `state_` with respect to the host.
   void Synchronize();
 
+  VkCommandPool CommandPool() const { return cmd_pool_; }
+
  private:
   const VulkanDevice* device_;
   std::unique_ptr<VulkanStreamState> state_;
diff --git a/src/runtime/vulkan/vulkan_timer.cc b/src/runtime/vulkan/vulkan_timer.cc
new file mode 100644
index 000000000000..87758f88e722
--- /dev/null
+++ b/src/runtime/vulkan/vulkan_timer.cc
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "vulkan_timer.h"
+
+#include <tvm/runtime/logging.h>
+
+#include "vulkan_device_api.h"
+
+namespace tvm {
+namespace runtime {
+namespace vulkan {
+
+VulkanTimerNode::VulkanTimerNode(Device dev) : dev_(dev) {
+  // Get the Vulkan device and stream
+  auto& vk_dev = VulkanDeviceAPI::Global()->device(dev_.device_id);
+  stream_ = &vk_dev.ThreadLocalStream();
+  device_ = vk_dev;
+
+  // Retrieve the timestamp period from device properties
+  timestamp_period_ = vk_dev.device_properties.timestamp_period;
+
+  CreateQueryPool();
+}
+
+VulkanTimerNode::~VulkanTimerNode() { Cleanup(); }
+
+void VulkanTimerNode::CreateQueryPool() {
+  VkQueryPoolCreateInfo query_pool_info{};
+  query_pool_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
+  query_pool_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
+  query_pool_info.queryCount = 2;
+
+  VkResult res = vkCreateQueryPool(device_, &query_pool_info, nullptr, &query_pool_);
+  ICHECK(res == VK_SUCCESS) << "Failed to create Vulkan query pool.";
+}
+
+void VulkanTimerNode::Start() {
+  stream_->Launch([this](VulkanStreamState* state) {
+    // Reset the query pool before writing timestamps
+    vkCmdResetQueryPool(state->cmd_buffer_, query_pool_, start_query_, 2);
+    vkCmdWriteTimestamp(state->cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, query_pool_,
+                        start_query_);
+  });
+}
+
+void VulkanTimerNode::Stop() {
+  stream_->Launch([this](VulkanStreamState* state) {
+    vkCmdWriteTimestamp(state->cmd_buffer_, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_pool_,
+                        end_query_);
+  });
+
+  // Ensure GPU has finished writing timestamps before collecting them
+  stream_->Synchronize();
+  CollectTimestamps();
+}
+
+int64_t VulkanTimerNode::SyncAndGetElapsedNanos() { return duration_; }
+
+void VulkanTimerNode::CollectTimestamps() {
+  uint64_t timestamps[2] = {0};
+
+  VkResult result =
+      vkGetQueryPoolResults(device_, query_pool_, 0, 2, sizeof(timestamps), timestamps,
+                            sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+
+  ICHECK(result == VK_SUCCESS) << "Failed to get Vulkan query pool results.";
+
+  // Calculate the duration in nanoseconds
+  uint64_t diff = timestamps[1] - timestamps[0];
+  duration_ = static_cast<int64_t>(diff * timestamp_period_);
+}
+
+void VulkanTimerNode::Cleanup() {
+  if (query_pool_ != VK_NULL_HANDLE) {
+    vkDestroyQueryPool(device_, query_pool_, nullptr);
+    query_pool_ = VK_NULL_HANDLE;
+  }
+}
+
+}  // namespace vulkan
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/vulkan/vulkan_timer.h b/src/runtime/vulkan/vulkan_timer.h
new file mode 100644
index 000000000000..7636fcd812bf
--- /dev/null
+++ b/src/runtime/vulkan/vulkan_timer.h
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_VULKAN_VULKAN_TIMER_H_
+#define TVM_RUNTIME_VULKAN_VULKAN_TIMER_H_
+
+#include <tvm/runtime/profiling.h>
+#include <vulkan/vulkan.h>
+
+#include "vulkan_device.h"
+#include "vulkan_stream.h"
+
+namespace tvm {
+namespace runtime {
+namespace vulkan {
+
+class VulkanDevice;
+
+/*!
+ * \brief Timer node for measuring GPU execution time using Vulkan.
+ *
+ * This class uses Vulkan timestamp queries to measure the time taken
+ * by GPU operations between `Start()` and `Stop()` calls.
+ */
+class VulkanTimerNode : public TimerNode {
+ public:
+  /*!
+   * \brief Constructs a VulkanTimerNode for the specified device.
+   * \param dev The TVM device to be used for timing.
+   */
+  explicit VulkanTimerNode(Device dev);
+
+  /*!
+   * \brief Destructor to clean up Vulkan resources.
+   */
+  ~VulkanTimerNode() override;
+
+  /*!
+   * \brief Starts the timer by recording a timestamp.
+   */
+  void Start() override;
+
+  /*!
+   * \brief Stops the timer by recording another timestamp.
+   */
+  void Stop() override;
+
+  /*!
+   * \brief Retrieves the elapsed time in nanoseconds.
+   * \return The elapsed time in nanoseconds between Start and Stop.
+   */
+  int64_t SyncAndGetElapsedNanos() override;
+
+  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.vulkan.VulkanTimerNode", VulkanTimerNode, TimerNode);
+
+ private:
+  Device dev_;                              ///< The TVM device being used.
+  VkDevice device_{VK_NULL_HANDLE};         ///< The Vulkan device handle.
+  VulkanStream* stream_{nullptr};           ///< The Vulkan stream for command buffer management.
+  VkQueryPool query_pool_{VK_NULL_HANDLE};  ///< The Vulkan query pool for timestamp queries.
+  float timestamp_period_;    ///< The period (in nanoseconds) for each timestamp tick.
+  uint32_t start_query_ = 0;  ///< The index for the start timestamp query.
+  uint32_t end_query_ = 1;    ///< The index for the end timestamp query.
+  int64_t duration_ = 0;      ///< The measured duration in nanoseconds.
+
+  /*!
+   * \brief Creates a Vulkan query pool for timestamp queries.
+   */
+  void CreateQueryPool();
+
+  /*!
+   * \brief Collects timestamps and calculates the duration.
+   */
+  void CollectTimestamps();
+
+  /*!
+   * \brief Cleans up the Vulkan query pool.
+   */
+  void Cleanup();
+};
+
+}  // namespace vulkan
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_VULKAN_VULKAN_TIMER_H_
diff --git a/src/runtime/vulkan/vulkan_wrapped_func.cc b/src/runtime/vulkan/vulkan_wrapped_func.cc
index 007d6abdbadb..8d78aa7da85a 100644
--- a/src/runtime/vulkan/vulkan_wrapped_func.cc
+++ b/src/runtime/vulkan/vulkan_wrapped_func.cc
@@ -25,6 +25,7 @@
 
 #include "../file_utils.h"
 #include "vulkan_device_api.h"
+#include "vulkan_resource.h"
 
 namespace tvm {
 namespace runtime {
@@ -45,6 +46,7 @@ void VulkanWrappedFunc::Init(VulkanModuleNode* m, ObjectPtr<Object> sptr,
 void VulkanWrappedFunc::operator()(ffi::PackedArgs args, ffi::Any* rv,
                                    const ArgUnion64* pack_args) const {
   int device_id = VulkanDeviceAPI::Global()->GetActiveDeviceID();
+  const auto total_function_args = num_buffer_args_;
   auto& device = VulkanDeviceAPI::Global()->device(device_id);
   if (!scache_[device_id]) {
     scache_[device_id] = m_->GetPipeline(device_id, func_name_, num_pack_args_);
@@ -52,15 +54,37 @@ void VulkanWrappedFunc::operator()(ffi::PackedArgs args, ffi::Any* rv,
   const auto& pipeline = scache_[device_id];
   ThreadWorkLoad wl = launch_param_config_.Extract(args);
   std::vector<VkDescriptorBufferInfo> descriptor_buffers;
-  descriptor_buffers.resize(num_buffer_args_);
+  std::vector<VkDescriptorImageInfo> descriptor_images;
+
+  descriptor_buffers.reserve(num_buffer_args_);
+  descriptor_images.reserve(num_buffer_args_);
+
   for (size_t i = 0; i < num_buffer_args_; ++i) {
-    void* buf = args[static_cast<int>(i)].cast<void*>();
-    VkDescriptorBufferInfo binfo;
-    binfo.buffer = static_cast<VulkanBuffer*>(buf)->buffer;
-    binfo.offset = 0;
-    binfo.range = VK_WHOLE_SIZE;
-    descriptor_buffers[i] = binfo;
+    void* res_ = args[static_cast<int>(i)].cast<void*>();
+    VulkanResource* res = static_cast<VulkanResource*>(res_);
+
+    if (auto* buffer = dynamic_cast<VulkanBuffer*>(res)) {
+      VkDescriptorBufferInfo binfo;
+      binfo.buffer = buffer->buffer;
+      binfo.offset = 0;
+      binfo.range = VK_WHOLE_SIZE;
+      descriptor_buffers.push_back(binfo);
+    } else if (auto* image = dynamic_cast<VulkanImage*>(res)) {
+      VkDescriptorImageInfo iinfo;
+      iinfo.imageView = image->imageView;
+      iinfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+      descriptor_images.push_back(iinfo);
+    }
+  }
+
+  // Check that the total number of descriptors matches num_buffer_args_
+  if (descriptor_buffers.size() + descriptor_images.size() != num_buffer_args_) {
+    std::cerr << "Error: The number of buffers and images does not match num_buffer_args_"
+              << std::endl;
+    // Handle the error appropriately (e.g., throw an exception, return, etc.)
+    throw std::runtime_error("Mismatch in the number of function arguments and descriptor sets.");
   }
+
   const size_t nbytes_scalars = num_pack_args_ * sizeof(ArgUnion64);
   if (pipeline->use_ubo) {
     auto& ubo = device.ThreadLocalUniformBuffer(nbytes_scalars);
@@ -71,13 +95,39 @@ void VulkanWrappedFunc::operator()(ffi::PackedArgs args, ffi::Any* rv,
     descriptor_buffers.push_back(binfo);
   }
   if (device.UseImmediate()) {
+    std::vector<uint8_t> descriptor_data;
+    descriptor_data.resize(descriptor_buffers.size() * sizeof(VkDescriptorBufferInfo) +
+                           descriptor_images.size() * sizeof(VkDescriptorImageInfo));
+
+    size_t offset = 0;
+    size_t buffer_idx = 0, image_idx = 0;
+
+    for (size_t i = 0; i < total_function_args; ++i) {
+      void* res_ = args[static_cast<int>(i)].cast<void*>();
+      VulkanResource* res = static_cast<VulkanResource*>(res_);
+      if (dynamic_cast<VulkanBuffer*>(res)) {
+        std::memcpy(descriptor_data.data() + offset, &descriptor_buffers[buffer_idx++],
+                    sizeof(VkDescriptorBufferInfo));
+        offset += sizeof(VkDescriptorBufferInfo);
+      } else if (dynamic_cast<VulkanImage*>(res)) {
+        std::memcpy(descriptor_data.data() + offset, &descriptor_images[image_idx++],
+                    sizeof(VkDescriptorImageInfo));
+        offset += sizeof(VkDescriptorImageInfo);
+      }
+    }
+
+    if (pipeline->use_ubo) {
+      std::memcpy(descriptor_data.data() + offset, &descriptor_buffers[buffer_idx++],
+                  sizeof(VkDescriptorBufferInfo));
+      offset += sizeof(VkDescriptorBufferInfo);
+    }
     // Can safely capture by reference as this lambda is immediately executed on the calling thread.
     device.ThreadLocalStream().Launch([&](VulkanStreamState* state) {
       vkCmdBindPipeline(state->cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline);
       ICHECK(pipeline->descriptor_update_template != VK_NULL_HANDLE);
       device.descriptor_template_khr_functions->vkCmdPushDescriptorSetWithTemplateKHR(
           state->cmd_buffer_, pipeline->descriptor_update_template, pipeline->pipeline_layout, 0,
-          descriptor_buffers.data());
+          descriptor_data.data());
 
       if (pipeline->use_ubo) {
         auto& ubo = device.ThreadLocalUniformBuffer(nbytes_scalars);
@@ -113,29 +163,38 @@ void VulkanWrappedFunc::operator()(ffi::PackedArgs args, ffi::Any* rv,
 
   // Otherwise, the more expensive deferred path.
   std::vector<ArgUnion64> pack_args_storage(pack_args, pack_args + num_pack_args_);
-  const auto& deferred_initializer = [&device, pipeline, descriptor_buffers]() {
+  const auto& deferred_initializer = [&device, pipeline, descriptor_buffers, descriptor_images,
+                                      args, total_function_args]() {
     std::vector<VkWriteDescriptorSet> write_descriptor_sets;
-    write_descriptor_sets.resize(descriptor_buffers.size());
-    for (size_t i = 0; i < write_descriptor_sets.size(); i++) {
-      write_descriptor_sets[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
-      write_descriptor_sets[i].pNext = nullptr;
-      write_descriptor_sets[i].dstSet = pipeline->descriptor_set;
-      write_descriptor_sets[i].dstBinding = i;
-      write_descriptor_sets[i].dstArrayElement = 0;
-      write_descriptor_sets[i].descriptorCount = 1;
-      write_descriptor_sets[i].pImageInfo = nullptr;
-      write_descriptor_sets[i].pBufferInfo = &(descriptor_buffers[i]);
-      write_descriptor_sets[i].pTexelBufferView = nullptr;
-
-      if (pipeline->use_ubo && i == write_descriptor_sets.size() - 1) {
-        // The last binding is for UBO
-        write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-      } else {
-        write_descriptor_sets[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+    write_descriptor_sets.reserve(descriptor_buffers.size() + descriptor_images.size());
+
+    size_t buffer_idx = 0, image_idx = 0;
+    // Iterate over the arguments to determine their bindings
+    for (size_t i = 0; i < total_function_args; ++i) {
+      void* res_ = args[static_cast<int>(i)].cast<void*>();
+      VulkanResource* res = static_cast<VulkanResource*>(res_);
+      VkWriteDescriptorSet write_set = {};
+      write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+      write_set.pNext = nullptr;
+      write_set.dstSet = pipeline->descriptor_set;
+      write_set.dstBinding = i;
+      write_set.dstArrayElement = 0;
+      write_set.descriptorCount = 1;
+      if (dynamic_cast<VulkanBuffer*>(res)) {
+        write_set.descriptorType =
+            (buffer_idx == descriptor_buffers.size() - 1 && pipeline->use_ubo)
+                ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
+                : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        write_set.pBufferInfo = &(descriptor_buffers[buffer_idx++]);
+        write_descriptor_sets.push_back(write_set);
+      } else if (dynamic_cast<VulkanImage*>(res)) {
+        write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+        write_set.pImageInfo = &(descriptor_images[image_idx++]);
+        write_descriptor_sets.push_back(write_set);
       }
     }
     vkUpdateDescriptorSets(device, write_descriptor_sets.size(), write_descriptor_sets.data(), 0,
-                           nullptr);
+                           0);
   };
   const auto& deferred_kernel = [this, pipeline, wl, pack_args_storage, nbytes_scalars,
                                  device_id](VulkanStreamState* state) {
@@ -278,8 +337,15 @@ std::shared_ptr<VulkanPipeline> VulkanModuleNode::GetPipeline(size_t device_id,
       tpl.dstArrayElement = 0;
       tpl.descriptorCount = 1;
       tpl.descriptorType = desc_type;
-      tpl.offset = binding * sizeof(VkDescriptorBufferInfo);
-      tpl.stride = sizeof(VkDescriptorBufferInfo);
+
+      // Choose the appropriate size for image descriptors
+      if (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+        tpl.offset = binding * sizeof(VkDescriptorImageInfo);
+        tpl.stride = sizeof(VkDescriptorImageInfo);
+      } else {
+        tpl.offset = binding * sizeof(VkDescriptorBufferInfo);
+        tpl.stride = sizeof(VkDescriptorBufferInfo);
+      }
       arg_template.push_back(tpl);
     }
   };
@@ -287,10 +353,17 @@ std::shared_ptr<VulkanPipeline> VulkanModuleNode::GetPipeline(size_t device_id,
   {
     auto fit = fmap_.find(func_name);
     ICHECK(fit != fmap_.end());
-    for (DLDataType arg_type : fit->second.arg_types) {
-      if (arg_type.code == kDLOpaqueHandle) {
-        push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
-        ++num_buffer;
+    const auto& info = fit->second;
+
+    for (size_t i = 0; i < info.arg_types.size(); ++i) {
+      if (info.arg_types[i].code == kDLOpaqueHandle) {
+        if (runtime::IsTextureStorage(info.storage_scopes[i])) {
+          push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
+          ++num_buffer;  // Increment num_image here
+        } else {
+          push_arg_info(num_buffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+          ++num_buffer;  // Increment num_buffer here
+        }
       } else {
         ++num_pod;
       }
diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc
index d0646ee8b06f..93d88bc6fe56 100644
--- a/src/support/libinfo.cc
+++ b/src/support/libinfo.cc
@@ -78,6 +78,10 @@
 #define TVM_INFO_USE_VULKAN "NOT-FOUND"
 #endif
 
+#ifndef TVM_INFO_USE_VULKAN_GTEST
+#define TVM_INFO_USE_VULKAN_GTEST "NOT-FOUND"
+#endif
+
 #ifndef TVM_INFO_USE_METAL
 #define TVM_INFO_USE_METAL "NOT-FOUND"
 #endif
@@ -352,6 +356,7 @@ TVM_DLL ffi::Map<ffi::String, ffi::String> GetLibInfo() {
       {"USE_THRUST", TVM_INFO_USE_THRUST},
       {"USE_CURAND", TVM_INFO_USE_CURAND},
       {"USE_VULKAN", TVM_INFO_USE_VULKAN},
+      {"USE_VULKAN_GTEST", TVM_INFO_USE_VULKAN_GTEST},
       {"USE_CLML", TVM_INFO_USE_CLML},
       {"TVM_CLML_VERSION", TVM_INFO_USE_TVM_CLML_VERSION},
       {"USE_CLML_GRAPH_EXECUTOR", TVM_INFO_USE_CLML_GRAPH_EXECUTOR},
diff --git a/src/target/build_common.h b/src/target/build_common.h
index cf1e3344fc3c..190943a313b5 100644
--- a/src/target/build_common.h
+++ b/src/target/build_common.h
@@ -59,6 +59,13 @@ inline std::unordered_map<std::string, runtime::FunctionInfo> ExtractFuncInfo(co
       info.arg_extra_tags.push_back(is_tensormap(f->params[i])
                                         ? runtime::FunctionInfo::ArgExtraTags::kTensorMap
                                         : runtime::FunctionInfo::ArgExtraTags::kNone);
+
+      // Get the storage scope from the type annotation if available
+      if (auto* ptr = f->params[i]->type_annotation.as<PointerTypeNode>()) {
+        info.storage_scopes.push_back(std::string(ptr->storage_scope));
+      } else {
+        info.storage_scopes.push_back("");
+      }
     }
     if (auto opt = f->GetAttr<ffi::Array<ffi::String>>(tir::attr::kKernelLaunchParams)) {
       for (const auto& tag : opt.value()) {
diff --git a/src/target/spirv/build_vulkan.cc b/src/target/spirv/build_vulkan.cc
index f71b7ef8d6fa..35f9e333c14f 100644
--- a/src/target/spirv/build_vulkan.cc
+++ b/src/target/spirv/build_vulkan.cc
@@ -24,6 +24,8 @@
 
 #include <tvm/ffi/reflection/registry.h>
 
+#include <string>
+
 #include "../../runtime/spirv/spirv_shader.h"
 #include "../../runtime/vulkan/vulkan_module.h"
 #include "../build_common.h"
@@ -43,5 +45,21 @@ TVM_FFI_STATIC_INIT_BLOCK() {
                         [](IRModule mod, Target target) { return BuildSPIRV(mod, target); });
 }
 
+ffi::String VulkanDeviceScopeCompatibilityFromTarget(Target target, ffi::String memory_scope) {
+  auto prototype_keys = target->GetKeys();
+  bool is_adreno =
+      std::find(prototype_keys.begin(), prototype_keys.end(), "adreno") != prototype_keys.end();
+  if (is_adreno) {
+    return ffi::String("global");
+  }
+  return memory_scope;
+}
+
+TVM_FFI_STATIC_INIT_BLOCK() {
+  namespace refl = tvm::ffi::reflection;
+  refl::GlobalDef().def("DeviceScopeCompatibility.vulkan",
+                        VulkanDeviceScopeCompatibilityFromTarget);
+}
+
 }  // namespace codegen
 }  // namespace tvm
diff --git a/src/target/spirv/codegen_spirv.cc b/src/target/spirv/codegen_spirv.cc
index 136f969896f5..e201172cfb21 100644
--- a/src/target/spirv/codegen_spirv.cc
+++ b/src/target/spirv/codegen_spirv.cc
@@ -30,6 +30,7 @@
 #include <string>
 
 #include "../../runtime/pack_args.h"
+#include "../../runtime/texture.h"
 #include "../../runtime/vulkan/vulkan_common.h"
 #include "../../tir/transforms/ir_utils.h"
 
@@ -42,7 +43,9 @@ runtime::SPIRVShader CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std::s
   this->InitFuncState();
   ICHECK(f->HasNonzeroAttr(tir::attr::kNoAlias)) << "SPIRV only takes restricted memory model";
   std::vector<Var> pod_args;
-  uint32_t i_buffer = 0;
+
+  // binding for images and buffers
+  uint32_t binding_index = 0;
 
   // Currently, all storage and uniform buffer arguments are passed as
   // a single descriptor set at index 0.  If ever non-zero, must
@@ -66,8 +69,16 @@ runtime::SPIRVShader CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std::s
         // The loaded byte is cast to bool inside the LoadNode visitor below.
         value_storage_type = boolean_storage_type_.with_lanes(value_storage_type.lanes());
       }
-      spirv::Value arg_value = builder_->BufferArgument(builder_->GetSType(value_storage_type),
-                                                        descriptor_set, i_buffer++);
+
+      spirv::Value arg_value;
+      if (ptr && runtime::IsTextureStorage(std::string(ptr->storage_scope))) {
+        arg_value = builder_->StorageImageArgument(arg->name_hint, value_storage_type, 2, 2,
+                                                   descriptor_set, binding_index++);
+      } else {
+        arg_value = builder_->BufferArgument(builder_->GetSType(value_storage_type), descriptor_set,
+                                             binding_index++);
+      }
+
       builder_->SetName(arg_value, arg->name_hint);
       storage_info_[arg.get()].SetContentType(value_storage_type, arg->name_hint);
       var_map_[arg.get()] = arg_value;
@@ -77,7 +88,6 @@ runtime::SPIRVShader CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std::s
   }
   spirv::Value func_ptr = builder_->NewFunction();
   builder_->StartFunction(func_ptr);
-
   runtime::SPIRVShader shader;
 
   if (pod_args.size() != 0) {
@@ -95,7 +105,8 @@ runtime::SPIRVShader CodeGenSPIRV::BuildFunction(const PrimFunc& f, const std::s
     } else {
       shader.flag |= 1 << runtime::vulkan::ShaderMetaDataFlagMask::kUseUBO;
       // If we need to pass more arguments than push constants could handle, we use UBO.
-      spirv::Value ptr = builder_->DeclareUniformBuffer(value_types, descriptor_set, i_buffer++);
+      spirv::Value ptr =
+          builder_->DeclareUniformBuffer(value_types, descriptor_set, binding_index++);
       for (size_t i = 0; i < pod_args.size(); ++i) {
         spirv::Value value = builder_->GetUniform(ptr, value_types[i], static_cast<uint32_t>(i));
         var_map_[pod_args[i].get()] = value;
@@ -511,6 +522,71 @@ spirv::Value CodeGenSPIRV::VisitExpr_(const CallNode* op) {
     return builder_->StructArrayAccess(ptr_type, var_map_[buffer_node], MakeValue(index));
   } else if (op->op.same_as(builtin::tvm_thread_invariant())) {
     return MakeValue(op->args[0]);
+  } else if (op->op.same_as(builtin::texture2d_store())) {
+    ICHECK_EQ(op->args.size(), 6U);
+
+    // Extract the four arguments and convert them to SPIR-V values
+    spirv::Value image = MakeValue(op->args[0]);        // image
+    spirv::Value coord_x = MakeValue(op->args[1]);      // x-coordinate
+    spirv::Value coord_y = MakeValue(op->args[2]);      // y-coordinate
+    spirv::Value layer_index = MakeValue(op->args[3]);  // layer_index
+    spirv::Value texel = MakeValue(op->args.back());
+
+    // Composite value representing the coordinates (int3)
+    spirv::Value coord =
+        builder_->MakeComposite(builder_->GetSType(DataType::Int(32).with_lanes(3)),
+                                {coord_x, coord_y, layer_index});
+
+    spirv::SType image_type = builder_->QuerySType(op->args[0].as<VarNode>()->name_hint);
+    spirv::Value loaded_image = builder_->MakeValue(spv::OpLoad, image_type, image);
+
+    // Generate the SPIR-V instruction to store the value in the texture
+    builder_->MakeInst(spv::OpImageWrite, loaded_image, coord, texel);
+    return spirv::Value();
+
+  } else if (op->op.same_as(builtin::texture2d_load())) {
+    ICHECK_EQ(op->args.size(), 6U);
+
+    // Extract the three arguments and convert them to SPIR-V values
+    spirv::SType image_type = builder_->QuerySType(op->args[0].as<VarNode>()->name_hint);
+    spirv::Value image = MakeValue(op->args[0]);        // image
+    spirv::Value coord_x = MakeValue(op->args[1]);      // x-coordinate
+    spirv::Value coord_y = MakeValue(op->args[2]);      // y-coordinate
+    spirv::Value layer_index = MakeValue(op->args[3]);  // layer_index
+
+    // Create a composite value representing the coordinates (int3)
+    spirv::Value coord =
+        builder_->MakeComposite(builder_->GetSType(DataType::Int(32).with_lanes(3)),
+                                {coord_x, coord_y, layer_index});
+
+    spirv::Value loaded_image =
+        builder_->MakeValue(spv::OpLoad, image_type, image);
+    spirv::Value image_texel = builder_->MakeValue(
+        spv::OpImageRead, builder_->GetSType(op->dtype.with_lanes(4)), loaded_image, coord);
+
+    if (op->args.back().as<RampNode>()) {
+      return image_texel;
+    } else {
+      std::vector<spirv::Value> components;
+      // Extract the required component from the vector
+      spirv::SType element_type =
+          builder_->GetSType(op->dtype.with_lanes(1));  // Scalar type (float)
+      spirv::Value index = MakeValue(op->args.back());  // Index to extract
+      spirv::Value component =
+          builder_->MakeValue(spv::OpVectorExtractDynamic, element_type, image_texel, index);
+
+      if (op->dtype.lanes() > 1) {
+        // Create a vector by duplicating the extracted component
+        for (int i = 0; i < op->dtype.lanes(); i++) {
+          components.push_back(component);
+        }
+        // Combine the components into a single vector
+        return builder_->Concat(components);
+      } else {
+        return component;
+      }
+    }
+
   } else {
     LOG(FATAL) << "Unresolved call  " << op->op;
   }
@@ -676,13 +752,12 @@ void CodeGenSPIRV::VisitStmt_(const ForNode* op) {
   spirv::Value init_value = MakeValue(op->min);
   PrimExpr end = is_zero(op->min) ? op->extent : analyzer_->Simplify(op->min + op->extent);
   spirv::Value end_value = MakeValue(end);
-  spirv::PhiValue loop_var = builder_->MakePhi(init_value.stype, 2);
 
   // loop step
   spirv::Value step;
   if (op->HasTrivialStep()) {
-    step = op->loop_var.dtype().is_int() ? builder_->IntImm(loop_var.stype, 1)
-                                         : builder_->UIntImm(loop_var.stype, 1);
+    step = op->loop_var.dtype().is_int() ? builder_->IntImm(init_value.stype, 1)
+                                         : builder_->UIntImm(init_value.stype, 1);
   } else {
     step = MakeValue(tvm::cast(end->dtype, *op->step));
   }
@@ -701,6 +776,7 @@ void CodeGenSPIRV::VisitStmt_(const ForNode* op) {
 
   // Loop head
   builder_->StartLabel(head_label);
+  spirv::PhiValue loop_var = builder_->MakePhi(init_value.stype, 2);
   loop_var.SetIncoming(0, init_value, init_label);
   spirv::Value loop_cond = builder_->LT(loop_var, end_value);
   uint32_t control =
diff --git a/src/target/spirv/ir_builder.cc b/src/target/spirv/ir_builder.cc
index bac66a3aacf7..951c799e0cee 100644
--- a/src/target/spirv/ir_builder.cc
+++ b/src/target/spirv/ir_builder.cc
@@ -534,6 +534,136 @@ SType IRBuilder::DeclareType(const DataType& dtype, uint32_t row, uint32_t col)
   }
 }
 
+void IRBuilder::RegisterSType(const std::string& name, SType var_stype) {
+  auto it = stype_name_tbl_.find(name);
+  if (it != stype_name_tbl_.end()) {
+    LOG(FATAL) << name << " already exists.";
+    return;
+  }
+  stype_name_tbl_[name] = var_stype;
+}
+
+SType IRBuilder::QuerySType(const std::string& name) {
+  auto it = stype_name_tbl_.find(name);
+  if (it != stype_name_tbl_.end()) {
+    return it->second;
+  }
+  LOG(FATAL) << "Value \"" << name << "\" does not yet exist.";
+  return SType();
+}
+
+bool IRBuilder::CheckSTypeExistence(const std::string& name) {
+  return stype_name_tbl_.find(name) != stype_name_tbl_.end();
+}
+
+spv::ImageFormat IRBuilder::GetImageFormat(const DataType& dtype, int channels) {
+  // Handle float formats
+  if (dtype.is_float()) {
+    switch (dtype.bits()) {
+      case 32:
+        if (channels == 1) return spv::ImageFormatR32f;
+        if (channels == 2) return spv::ImageFormatRg32f;
+        if (channels == 4) return spv::ImageFormatRgba32f;
+        break;
+      case 16:
+        if (channels == 1) return spv::ImageFormatR16f;
+        if (channels == 2) return spv::ImageFormatRg16f;
+        if (channels == 4) return spv::ImageFormatRgba16f;
+        break;
+      default:
+        return spv::ImageFormatUnknown;
+    }
+  } else if (dtype.is_int()) {
+    switch (dtype.bits()) {
+      case 32:
+        if (channels == 1) return spv::ImageFormatR32i;
+        if (channels == 2) return spv::ImageFormatRg32i;
+        if (channels == 4) return spv::ImageFormatRgba32i;
+        break;
+      case 16:
+        if (channels == 1) return spv::ImageFormatR16i;
+        if (channels == 2) return spv::ImageFormatRg16i;
+        if (channels == 4) return spv::ImageFormatRgba16i;
+        break;
+      case 8:
+        if (channels == 1) return spv::ImageFormatR8i;
+        if (channels == 2) return spv::ImageFormatRg8i;
+        if (channels == 4) return spv::ImageFormatRgba8i;
+        break;
+      default:
+        return spv::ImageFormatUnknown;
+    }
+  }
+  return spv::ImageFormatUnknown;
+}
+
+SType IRBuilder::GetStorageImageSType(const DataType& dtype, int num_dimensions, uint32_t sampled) {
+  // Get the appropriate SPIR-V ImageFormat using the dtype
+  spv::ImageFormat spv_format = GetImageFormat(dtype, 4);
+
+  // get SPIR-V type for image
+  SType value_type = GetSType(dtype);
+  if (spv_format == spv::ImageFormatUnknown) {
+    LOG(FATAL) << "Unsupported image format for dtype: " << dtype;
+  }
+
+  // Create a key to cache and reuse image types
+  auto key = std::make_tuple(spv_format, num_dimensions, sampled);
+  auto it = storage_image_ptr_tbl_.find(key);
+  if (it != storage_image_ptr_tbl_.end()) {
+    return it->second;
+  }
+
+  // Determine the SPIR-V dimension based on the number of dimensions
+  spv::Dim dim;
+  if (num_dimensions == 1) {
+    dim = spv::Dim1D;
+  } else if (num_dimensions == 2) {
+    dim = spv::Dim2D;
+  } else if (num_dimensions == 3) {
+    dim = spv::Dim3D;
+  } else {
+    LOG(FATAL) << "Unsupported number of dimensions: " << num_dimensions;
+  }
+
+  // Generate a unique ID for the new image type
+  int img_id = id_counter_++;
+
+  // Declare the SPIR-V image type
+  ib_.Begin(spv::OpTypeImage)
+      .AddSeq(img_id, value_type, dim,
+              /*Depth=*/0, /*Arrayed=*/1, /*MS=*/0, /*Sampled=*/sampled, spv_format)
+      .Commit(&global_);
+
+  // Create and cache the new image type
+  SType img_t;
+  img_t.id = img_id;
+  img_t.element_type_id = value_type.id;
+  storage_image_ptr_tbl_[key] = img_t;
+  return img_t;
+}
+
+Value IRBuilder::StorageImageArgument(const std::string& name, const DataType& dtype,
+                                      int num_dimensions, uint32_t sampled, uint32_t descriptor_set,
+                                      uint32_t binding) {
+  auto texture_type = GetStorageImageSType(dtype, num_dimensions, sampled);
+  auto texture_ptr_type = GetPointerType(texture_type, spv::StorageClassUniformConstant);
+
+  // Store the type in the map
+  RegisterSType(name, texture_type);
+  Value val = NewValue(texture_ptr_type, kVariablePtr);
+
+  // Variable declaration
+  ib_.Begin(spv::OpVariable)
+      .AddSeq(texture_ptr_type, val, spv::StorageClassUniformConstant)
+      .Commit(&global_);
+
+  // Decorate the image argument
+  this->Decorate(spv::OpDecorate, val, spv::DecorationDescriptorSet, descriptor_set);
+  this->Decorate(spv::OpDecorate, val, spv::DecorationBinding, binding);
+  return val;
+}
+
 void IRBuilder::AddCapabilityFor(const DataType& dtype) {
   // Declare appropriate capabilities for int/float types
   if (dtype.is_int() || dtype.is_uint()) {
diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h
index 5df779c59547..6c57c6982f65 100644
--- a/src/target/spirv/ir_builder.h
+++ b/src/target/spirv/ir_builder.h
@@ -63,6 +63,7 @@ enum ValueKind {
   kVectorPtr,
   kStructArrayPtr,
   kPushConstantPtr,
+  kVariablePtr,
   kFunction,
   kExtInst,
   kUniformPtr,
@@ -607,6 +608,45 @@ class IRBuilder {
    */
   Value GetSpecConst(const SType& dtype, uint64_t value);
 
+  SType f32_type() const { return t_fp32_; }
+
+  SType i32_type() const { return t_int32_; }
+
+  SType f32_v4_type() const { return t_v4_fp32_; }
+
+  // Register name to corresponding Value/VariablePointer
+  void RegisterSType(const std::string& name, SType var_stype);
+  // Query Value/VariablePointer by name
+  SType QuerySType(const std::string& name);
+  // Check whether a value has been evaluated
+  bool CheckSTypeExistence(const std::string& name);
+
+  Value MakeComposite(const SType& composite_type, const std::vector<Value>& constituents) {
+    // Create a new SSA value for the composite type
+    Value composite_value = NewValue(composite_type, kNormal);
+
+    // Begin the OpCompositeConstruct instruction
+    ib_.Begin(spv::OpCompositeConstruct)
+        .Add(composite_type)
+        .Add(composite_value);
+
+    // Add each constituent value
+    for (const Value& val : constituents) {
+      ib_.Add(val);
+    }
+
+    // Commit the instruction to the function segment
+    ib_.Commit(&function_);
+    return composite_value;
+  }
+
+  spv::ImageFormat GetImageFormat(const DataType& dtype, int channels);
+
+  Value StorageImageArgument(const std::string& name, const DataType& dtype, int num_dimensions,
+                             uint32_t sampled, uint32_t descriptor_set, uint32_t binding);
+
+  SType GetStorageImageSType(const DataType& dtype, int num_dimensions, uint32_t sampled);
+
  private:
   /*!
    * \brief Create new value
@@ -679,7 +719,8 @@ class IRBuilder {
   /*! \brief glsl 450 extension */
   Value ext_glsl450_;
   /*! \brief Special cache int32, fp32, void*/
-  SType t_bool_, t_int32_, t_uint32_, t_fp32_, t_void_, t_void_func_;
+  SType t_bool_, t_int32_, t_uint32_, t_v2_int_, t_fp16_, t_fp32_, t_v4_fp32_, t_void_,
+      t_void_func_;
   /*! \brief quick cache for const one i32 */
   Value const_i32_zero_;
 
@@ -723,6 +764,9 @@ class IRBuilder {
   /*! \brief map from name of a ExtInstImport to its value */
   std::map<std::string, Value> ext_inst_tbl_;
 
+  std::map<std::tuple<spv::ImageFormat, int, uint32_t>, SType> storage_image_ptr_tbl_;
+  std::unordered_map<std::string, SType> stype_name_tbl_;
+
   /*! \brief Header segment
    *
    * 5 words long, described in "First Words of Physical Layout"
diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc
index 96e90f17ac79..0e4af4b12b43 100644
--- a/src/target/target_kind.cc
+++ b/src/target/target_kind.cc
@@ -393,10 +393,10 @@ TVM_REGISTER_TARGET_KIND("vulkan", kDLVulkan)
     .add_attr_option<bool>("supports_int8")
     .add_attr_option<bool>("supports_int16")
     .add_attr_option<bool>("supports_int32", true)
-    .add_attr_option<bool>("supports_int64")
+    .add_attr_option<bool>("supports_int64", true)
     .add_attr_option<bool>("supports_8bit_buffer")
     .add_attr_option<bool>("supports_16bit_buffer")
-    .add_attr_option<bool>("supports_storage_buffer_storage_class")
+    .add_attr_option<bool>("supports_storage_buffer_storage_class", true)
     .add_attr_option<bool>("supports_push_descriptor")
     .add_attr_option<bool>("supports_dedicated_allocation")
     .add_attr_option<bool>("supports_integer_dot_product")
@@ -406,6 +406,8 @@ TVM_REGISTER_TARGET_KIND("vulkan", kDLVulkan)
     .add_attr_option<int64_t>("max_num_threads", 256)
     .add_attr_option<int64_t>("max_threads_per_block", 256)
     .add_attr_option<int64_t>("thread_warp_size", 1)
+    .add_attr_option<int64_t>("texture_spatial_limit", 16384)
+    .add_attr_option<int64_t>("texture_depth_limit", 2048)
     .add_attr_option<int64_t>("max_block_size_x")
     .add_attr_option<int64_t>("max_block_size_y")
     .add_attr_option<int64_t>("max_block_size_z")
@@ -421,6 +423,7 @@ TVM_REGISTER_TARGET_KIND("vulkan", kDLVulkan)
     .add_attr_option<int64_t>("driver_version")
     .add_attr_option<int64_t>("vulkan_api_version")
     .add_attr_option<int64_t>("max_spirv_version")
+    .add_attr_option<int64_t>("image_base_address_alignment", 64)
     // Tags
     .set_default_keys({"vulkan", "gpu"});
 
diff --git a/tests/cpp-runtime/vulkan/texture_copy_test.cc b/tests/cpp-runtime/vulkan/texture_copy_test.cc
new file mode 100644
index 000000000000..0762d73e1672
--- /dev/null
+++ b/tests/cpp-runtime/vulkan/texture_copy_test.cc
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <dmlc/logging.h>
+#include <gtest/gtest.h>
+#include <tvm/ffi/function.h>
+#include <tvm/ffi/reflection/registry.h>
+#include <tvm/runtime/module.h>
+
+#include <cmath>
+#include <random>
+
+#include "../src/runtime/vulkan/vulkan_device_api.h"
+
+using tvm::runtime::memory::AllocatorType;
+using tvm::runtime::memory::MemoryManager;
+using tvm::runtime::memory::Storage;
+
+class VulkanTextureCopyTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    bool enabled = tvm::runtime::RuntimeEnabled("vulkan");
+    if (!enabled) {
+      GTEST_SKIP() << "Skip texture copy test because Vulkan runtime is disabled.\n";
+    }
+  }
+};
+
+TEST_F(VulkanTextureCopyTest, ViewBufferAsBuffer) {
+  using namespace tvm;
+  std::vector<int64_t> shape{1, 16, 16, 8};
+  std::vector<int64_t> same_shape{1, 8, 16, 16};
+  auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
+  auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
+
+  ffi::String mem_scope = "global";
+
+  DLDevice cl_dev = {kDLVulkan, 0};
+  auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
+  auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
+  auto stor = Storage(buffer, allocator);
+
+  auto vulkan_memobj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, mem_scope);
+  auto vulkan_memview =
+      stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, mem_scope);
+
+  std::random_device dev;
+  std::mt19937 mt(dev());
+  std::uniform_real_distribution<> random(-10.0, 10.0);
+
+  size_t size = 1;
+  for (size_t i = 0; i < shape.size(); ++i) {
+    size *= static_cast<size_t>(shape[i]);
+  }
+
+  /* Check original object round trip */
+  // Random initialize host pool storage
+  for (size_t i = 0; i < size; i++) {
+    static_cast<float*>(cpu_arr->data)[i] = random(mt);
+  }
+  // Copy to VulkanBuffer
+  cpu_arr.CopyTo(vulkan_memobj);
+  // Copy from VulkanBuffer
+  vulkan_memobj.CopyTo(cpu_arr_ret);
+  for (size_t i = 0; i < size; i++) {
+    ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
+                        static_cast<float*>(cpu_arr_ret->data)[i]),
+              1e-5);
+  }
+
+  /* Check view object round trip */
+  // Random initialize host pool storage
+  for (size_t i = 0; i < size; i++) {
+    static_cast<float*>(cpu_arr->data)[i] = random(mt);
+  }
+  // Copy to VulkanBuffer
+  cpu_arr.CopyTo(vulkan_memview);
+  // Copy from VulkanBuffer
+  vulkan_memview.CopyTo(cpu_arr_ret);
+  for (size_t i = 0; i < size; i++) {
+    ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
+                        static_cast<float*>(cpu_arr_ret->data)[i]),
+              1e-5);
+  }
+}
+
+TEST_F(VulkanTextureCopyTest, ViewBufferAsImage) {
+  using namespace tvm;
+  // Shape that doesn't cause padding for image row
+  std::vector<int64_t> shape{1, 16, 16, 8, 4};
+  std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
+  auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
+  auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
+
+  DLDevice cl_dev = {kDLVulkan, 0};
+  auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
+  auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
+  auto stor = Storage(buffer, allocator);
+
+  auto vulkan_buf_obj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global");
+  auto vulkan_img_obj =
+      stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global.texture");
+
+  std::random_device dev;
+  std::mt19937 mt(dev());
+  std::uniform_real_distribution<> random(-10.0, 10.0);
+
+  size_t size = 1;
+  for (size_t i = 0; i < shape.size(); ++i) {
+    size *= static_cast<size_t>(shape[i]);
+  }
+
+  /* Check original object round trip */
+  // Random initialize host pool storage
+  for (size_t i = 0; i < size; i++) {
+    static_cast<float*>(cpu_arr->data)[i] = random(mt);
+  }
+  // Copy to VulkanBuffer
+  cpu_arr.CopyTo(vulkan_buf_obj);
+  // Copy from VulkanBuffer
+  vulkan_buf_obj.CopyTo(cpu_arr_ret);
+  for (size_t i = 0; i < size; i++) {
+    ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
+                        static_cast<float*>(cpu_arr_ret->data)[i]),
+              1e-5);
+  }
+
+  /* Check view object round trip */
+  // Random initialize host pool storage
+  for (size_t i = 0; i < size; i++) {
+    static_cast<float*>(cpu_arr->data)[i] = random(mt);
+  }
+  // Copy to VulkanBuffer
+  cpu_arr.CopyTo(vulkan_img_obj);
+  // Copy from VulkanBuffer
+  vulkan_img_obj.CopyTo(cpu_arr_ret);
+  for (size_t i = 0; i < size; i++) {
+    ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
+                        static_cast<float*>(cpu_arr_ret->data)[i]),
+              1e-5);
+  }
+}
diff --git a/tests/cpp-runtime/vulkan/vulkan_timer_test.cc b/tests/cpp-runtime/vulkan/vulkan_timer_test.cc
new file mode 100644
index 000000000000..b02f6c9ceec9
--- /dev/null
+++ b/tests/cpp-runtime/vulkan/vulkan_timer_test.cc
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "../src/runtime/vulkan/vulkan_timer.h"
+
+#include <gtest/gtest.h>
+#include <tvm/runtime/profiling.h>
+
+#include "../src/runtime/vulkan/vulkan_device_api.h"
+
+using namespace tvm::runtime;
+using namespace tvm::runtime::vulkan;
+
+#define BUFF_SIZE 1024
+#define NUM_REPEAT 10
+
+TEST(VulkanTimerNode, TimerCorrectness) {
+  VulkanDeviceAPI* api = VulkanDeviceAPI::Global();
+  auto device_id = api->GetActiveDeviceID();
+  tvm::Device dev{kDLVulkan, device_id};
+
+  constexpr int32_t kBufferSize = 1024;
+  Tensor src = Tensor::Empty({kBufferSize}, {kDLInt, 32, 1}, {kDLCPU, 0});
+  Tensor dst = Tensor::Empty({kBufferSize}, {kDLInt, 32, 1}, {kDLVulkan, device_id});
+
+  // Fill CPU array with dummy data
+  for (int32_t i = 0; i < kBufferSize; ++i) {
+    static_cast<int32_t*>(src->data)[i] = i;
+  }
+
+  // Create a Timer
+  Timer timer = Timer::Start(dev);
+
+  // Perform a CPU -> Vulkan copy
+  src.CopyTo(dst);
+
+  // Important: Force Vulkan to flush and sync work
+  api->StreamSync(dev, nullptr);
+
+  timer->Stop();
+  int64_t elapsed_nanos = timer->SyncAndGetElapsedNanos();
+
+  std::cout << "Elapsed time (nanoseconds): " << elapsed_nanos << std::endl;
+
+  // Check that some time was measured
+  ASSERT_GT(elapsed_nanos, 0);
+}
diff --git a/tests/python/relax/texture/test_network.py b/tests/python/relax/texture/test_network.py
index aeb4f1248c10..7321d3827120 100644
--- a/tests/python/relax/texture/test_network.py
+++ b/tests/python/relax/texture/test_network.py
@@ -37,8 +37,8 @@
 import copy
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_network_resnet(target):
     @I.ir_module
     class Resnet:
diff --git a/tests/python/relax/texture/test_ops.py b/tests/python/relax/texture/test_ops.py
index 7011852aa3ab..7c0f523b42ba 100644
--- a/tests/python/relax/texture/test_ops.py
+++ b/tests/python/relax/texture/test_ops.py
@@ -40,8 +40,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_relu(target):
     @I.ir_module
     class Input:
@@ -58,8 +58,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_relu_conv2d_relu(target):
     @I.ir_module
     class Input:
@@ -77,8 +77,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_relu_tanh(target):
     @I.ir_module
     class Input:
@@ -96,8 +96,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_add(target):
     @I.ir_module
     class Input:
@@ -116,8 +116,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_sum(target):
     @I.ir_module
     class Input:
@@ -134,8 +134,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_sum_keepdims(target):
     @I.ir_module
     class Input:
@@ -152,8 +152,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_sum_reduce(target):
     @I.ir_module
     class Input:
@@ -170,8 +170,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_transpose(target):
     @I.ir_module
     class Input:
@@ -188,8 +188,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_expand_dims(target):
     @I.ir_module
     class Input:
@@ -206,8 +206,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_squeeze(target):
     @I.ir_module
     class Input:
@@ -224,8 +224,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_strided_slice(target):
     @I.ir_module
     class Input:
@@ -244,8 +244,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_relu_concat(target):
     @I.ir_module
     class Input:
@@ -263,8 +263,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_relu_concat_split(target):
     @I.ir_module
     class Input:
@@ -283,8 +283,8 @@ def main(x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "fl
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_relu_concat_split_transpose_concat(target):
     @I.ir_module
     class Input:
@@ -304,8 +304,8 @@ def main(x: R.Tensor((2, 16, 28, 28), "float32"), w: R.Tensor((4, 16, 3, 3), "fl
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_maxpool2d(target):
     @I.ir_module
     class Input:
@@ -329,8 +329,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_avgpool2d(target):
     @I.ir_module
     class Input:
@@ -347,8 +347,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_softmax(target):
     @I.ir_module
     class Input:
@@ -365,8 +365,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_conv2d_layernorm(target):
     @I.ir_module
     class Input:
@@ -388,8 +388,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_binary_broadcast(target):
     @I.ir_module
     class Input:
@@ -408,8 +408,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_binary_ewise_scalar(target):
     @I.ir_module
     class Input:
@@ -426,8 +426,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_residual_block(target):
     """
     - some kind of residual block followed by convolution to have texture after residual block
@@ -512,7 +512,7 @@ def main(
                 R.output(gv7)
             return gv7
 
-    verify(Input, "opencl")
+    verify(Input, target)
 
 
 @tvm.testing.requires_opencl
@@ -553,11 +553,11 @@ def main(
                 R.output(gv7)
             return gv7
 
-    verify(Input, "opencl")
+    verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_pooling_branching_texture_params(target):
     """
     Verification of the pooling and many branches having textures
@@ -610,8 +610,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_injective_inputs1(target):
     """
                                      Input
@@ -659,8 +659,8 @@ def main(
     verify(Input, target)
 
 
-@tvm.testing.requires_opencl
-@tvm.testing.parametrize_targets("opencl")
+@tvm.testing.requires_opencl_vulkan
+@tvm.testing.parametrize_targets("opencl", "vulkan")
 def test_injective_nwo_inputs2(target):
     """
                                      Input
diff --git a/tests/scripts/ci.py b/tests/scripts/ci.py
index 96f0a5f7edaa..9b0c4101437a 100755
--- a/tests/scripts/ci.py
+++ b/tests/scripts/ci.py
@@ -593,6 +593,7 @@ def add_subparser(
                 [
                     "./tests/scripts/task_java_unittest.sh",
                     "./tests/scripts/task_opencl_cpp_unittest.sh {build_dir}",
+                    "./tests/scripts/task_vulkan_cpp_unittest.sh {build_dir}",
                     "./tests/scripts/task_python_unittest_gpuonly.sh",
                     "./tests/scripts/task_python_integration_gpuonly.sh",
                 ],
diff --git a/tests/scripts/task_config_build_adreno.sh b/tests/scripts/task_config_build_adreno.sh
index 10fefefbe800..424e11280ed5 100755
--- a/tests/scripts/task_config_build_adreno.sh
+++ b/tests/scripts/task_config_build_adreno.sh
@@ -24,6 +24,8 @@ cd "$BUILD_DIR"
 cp ../cmake/config.cmake .
 
 echo set\(USE_OPENCL_GTEST /googletest\) >> config.cmake
+#echo set\(USE_VULKAN_GTEST /googletest\) >> config.cmake
+
 if [ -f "${ADRENO_OPENCL}/CL/cl_qcom_ml_ops.h" ] ; then
 echo set\(USE_CLML ${ADRENO_OPENCL}\) >> config.cmake
 fi
@@ -31,3 +33,4 @@ echo set\(USE_OPENCL ON\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
 echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake
 echo set\(USE_LLVM ON\) >> config.cmake
+#echo set\(USE_VULKAN ON\) >> config.cmake
diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index f306bdf8bf74..a71a2cfedfe4 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -29,6 +29,7 @@ echo set\(USE_CUDA ON\) >> config.cmake
 echo set\(USE_VULKAN ON\) >> config.cmake
 echo set\(USE_OPENCL ON\) >> config.cmake
 echo set\(USE_OPENCL_GTEST \"/googletest\"\) >> config.cmake
+echo set\(USE_VULKAN_GTEST \"/googletest\"\) >> config.cmake
 echo set\(USE_LLVM \"/usr/bin/llvm-config-15 --link-static\"\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
 echo set\(USE_SORT ON\) >> config.cmake
diff --git a/tests/scripts/task_python_adreno.sh b/tests/scripts/task_python_adreno.sh
index b381fddc2427..ceb045b06657 100755
--- a/tests/scripts/task_python_adreno.sh
+++ b/tests/scripts/task_python_adreno.sh
@@ -18,8 +18,9 @@
 
 set -euxo pipefail
 
-export TVM_TEST_TARGETS="opencl"
-export TVM_RELAY_OPENCL_TEXTURE_TARGETS="opencl -device=adreno"
+export TVM_TEST_TARGETS="opencl;vulkan"
+export TVM_RELAX_TEXTURE_TARGETS="opencl -device=adreno;vulkan -device=adreno"
+
 
 source tests/scripts/setup-pytest-env.sh
 export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}"
diff --git a/tests/scripts/task_python_unittest_gpuonly.sh b/tests/scripts/task_python_unittest_gpuonly.sh
index 776d29fda07f..217c6dbd2755 100755
--- a/tests/scripts/task_python_unittest_gpuonly.sh
+++ b/tests/scripts/task_python_unittest_gpuonly.sh
@@ -38,7 +38,7 @@ run_pytest ${TVM_UNITTEST_TESTSUITE_NAME}-1 tests/python/codegen/test_target_cod
 
 # Adreno : A comprehensive Texture tests on Nvidia GPU and clml codegen tests.
 export PYTEST_ADDOPTS=""
-export TVM_TEST_TARGETS="opencl"
+export TVM_TEST_TARGETS="opencl;vulkan -from_device=0"
 export TVM_UNITTEST_TESTSUITE_NAME=python-codegen-clml-texture
 
 source tests/scripts/setup-pytest-env.sh
diff --git a/tests/scripts/task_vulkan_cpp_unittest.sh b/tests/scripts/task_vulkan_cpp_unittest.sh
new file mode 100644
index 000000000000..79debce955bc
--- /dev/null
+++ b/tests/scripts/task_vulkan_cpp_unittest.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euxo pipefail
+
+if [ $# -gt 0 ]; then
+    BUILD_DIR="$1"
+elif [ -n "${TVM_BUILD_PATH:-}" ]; then
+    # TVM_BUILD_PATH may contain multiple space-separated paths.  If
+    # so, use the first one.
+    BUILD_DIR=$(IFS=" "; set -- $TVM_BUILD_PATH; echo $1)
+else
+    BUILD_DIR=build
+fi
+
+# to avoid CI thread throttling.
+export TVM_BIND_THREADS=0
+export OMP_NUM_THREADS=1
+
+pushd "${BUILD_DIR}"
+# run cpp test executable
+./vulkan-cpptest
+popd