diff --git a/auto_round/inference/backend.py b/auto_round/inference/backend.py index 3b243bff0..7b1783998 100644 --- a/auto_round/inference/backend.py +++ b/auto_round/inference/backend.py @@ -582,7 +582,7 @@ def fp8_static_scheme_checker( BackendInfos["ipex_awq"] = BackendInfo( - device=["cpu"], + device=["xpu"], sym=[True, False], packing_format=AWQ_FORMAT, bits=[4], diff --git a/test/test_cuda/test_auto_round_format.py b/test/test_cuda/test_auto_round_format.py index 9604ffff1..821d45fa1 100644 --- a/test/test_cuda/test_auto_round_format.py +++ b/test/test_cuda/test_auto_round_format.py @@ -158,7 +158,7 @@ def test_autoround_gptq_sym_format(self, tiny_opt_model_path, dataloader): from transformers import AutoRoundConfig - quantization_config = AutoRoundConfig(backend="ipex_gptq") + quantization_config = AutoRoundConfig(backend="ipex") model = AutoModelForCausalLM.from_pretrained( quantized_model_path, device_map="cpu", trust_remote_code=True, quantization_config=quantization_config