From 76f5340d5eb2b2fc250be278db4de85069fcda3d Mon Sep 17 00:00:00 2001 From: Wenhua Cheng Date: Tue, 30 Dec 2025 14:15:33 +0800 Subject: [PATCH 1/2] update --- auto_round/inference/backend.py | 121 ++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 45 deletions(-) diff --git a/auto_round/inference/backend.py b/auto_round/inference/backend.py index 057ecf5bd..e2455aacf 100644 --- a/auto_round/inference/backend.py +++ b/auto_round/inference/backend.py @@ -439,21 +439,21 @@ def fp8_static_scheme_checker( requirements=["autoawq", "transformers"], ) -BackendInfos["auto_round_kernel"] = BackendInfo( - device=["cpu"], - sym=[True, False], - packing_format=GPTQ_FORMAT_NO_ZP, - bits=[2, 4, 8], - group_size=None, - priority=6, - checkers=[ark_feature_checker], - alias=["ark"], - compute_dtype=["float32", "float16"], - data_type=["int"], - act_bits=WOQ_DEFAULT_ACT_BITS, - requirements=["torch>=2.9.0", "auto_round_kernel"], - systems=["linux"], -) +# BackendInfos["auto_round_kernel"] = BackendInfo( +# device=["cpu"], +# sym=[True, False], +# packing_format=GPTQ_FORMAT_NO_ZP, +# bits=[2, 4, 8], +# group_size=None, +# priority=6, +# checkers=[ark_feature_checker], +# alias=["ark"], +# compute_dtype=["float32", "float16"], +# data_type=["int"], +# act_bits=WOQ_DEFAULT_ACT_BITS, +# requirements=["torch>=2.9.0", "auto_round_kernel"], +# systems=["linux"], +# ) BackendInfos["auto_round_kernel_xpu"] = BackendInfo( device=["xpu"], @@ -471,11 +471,27 @@ def fp8_static_scheme_checker( systems=["linux"], ) -BackendInfos["auto_round_kernel_zp"] = BackendInfo( - device=["cpu"], - sym=[True, False], +# BackendInfos["auto_round_kernel_zp"] = BackendInfo( +# device=["cpu"], +# sym=[True, False], +# packing_format=GPTQ_FORMAT, +# bits=[2, 4, 8], +# group_size=None, +# priority=6, +# checkers=[ark_feature_checker], +# alias=["ark"], +# compute_dtype=["float32", "float16"], +# data_type=["int"], +# act_bits=WOQ_DEFAULT_ACT_BITS, +# requirements=["torch>=2.9.0", "auto_round_kernel"], +# systems=["linux"], +# ) + +BackendInfos["auto_round_kernel_zp_xpu"] = BackendInfo( + device=["xpu"], + sym=[True], packing_format=GPTQ_FORMAT, - bits=[2, 4, 8], + bits=[4, 8], group_size=None, priority=6, checkers=[ark_feature_checker], @@ -487,10 +503,26 @@ def fp8_static_scheme_checker( systems=["linux"], ) -BackendInfos["auto_round_kernel_zp_xpu"] = BackendInfo( +# BackendInfos["auto_round_kernel_awq"] = BackendInfo( +# device=["cpu"], +# sym=[True, False], +# packing_format=AWQ_FORMAT, +# bits=[2, 4, 8], +# group_size=None, +# priority=6, +# checkers=[ark_feature_checker], +# alias=["ark"], +# compute_dtype=["float32", "float16"], +# data_type=["int"], +# act_bits=WOQ_DEFAULT_ACT_BITS, +# requirements=["torch>=2.9.0", "auto_round_kernel"], +# systems=["linux"], +# ) + +BackendInfos["auto_round_kernel_awq_xpu"] = BackendInfo( device=["xpu"], sym=[True], - packing_format=GPTQ_FORMAT, + packing_format=AWQ_FORMAT, bits=[4, 8], group_size=None, priority=6, @@ -503,42 +535,40 @@ def fp8_static_scheme_checker( systems=["linux"], ) -BackendInfos["auto_round_kernel_awq"] = BackendInfo( +BackendInfos["ipex_gptq_cpu"] = BackendInfo( device=["cpu"], sym=[True, False], - packing_format=AWQ_FORMAT, - bits=[2, 4, 8], + packing_format=GPTQ_FORMAT, + bits=[4], group_size=None, - priority=6, - checkers=[ark_feature_checker], - alias=["ark"], - compute_dtype=["float32", "float16"], + priority=5, + checkers=[], + compute_dtype=["float16", "bfloat16"], data_type=["int"], act_bits=WOQ_DEFAULT_ACT_BITS, - requirements=["torch>=2.9.0", "auto_round_kernel"], - systems=["linux"], + alias=["ipex"], + requirements=["torch<2.9", "intel-extension-for-pytorch>=2.5"], ) -BackendInfos["auto_round_kernel_awq_xpu"] = BackendInfo( +BackendInfos["ipex_gptq"] = BackendInfo( device=["xpu"], - sym=[True], - packing_format=AWQ_FORMAT, - bits=[4, 8], + sym=[True, False], + packing_format=GPTQ_FORMAT, + bits=[4], group_size=None, - priority=6, - checkers=[ark_feature_checker], - alias=["ark"], - compute_dtype=["float32", "float16"], + priority=5, + checkers=[], + compute_dtype=["float16", "bfloat16"], data_type=["int"], act_bits=WOQ_DEFAULT_ACT_BITS, - requirements=["torch>=2.9.0", "auto_round_kernel"], - systems=["linux"], + alias=["ipex"], + requirements=["intel-extension-for-pytorch>=2.5"], ) -BackendInfos["ipex_gptq"] = BackendInfo( - device=["cpu", "xpu"], +BackendInfos["ipex_awq_cpu"] = BackendInfo( + device=["cpu"], sym=[True, False], - packing_format=GPTQ_FORMAT, + packing_format=AWQ_FORMAT, bits=[4], group_size=None, priority=5, @@ -547,11 +577,12 @@ def fp8_static_scheme_checker( data_type=["int"], act_bits=WOQ_DEFAULT_ACT_BITS, alias=["ipex"], - requirements=["intel-extension-for-pytorch>=2.5"], + requirements=["torch<2.9","intel-extension-for-pytorch>=2.5"], ) + BackendInfos["ipex_awq"] = BackendInfo( - device=["cpu", "xpu"], + device=["cpu"], sym=[True, False], packing_format=AWQ_FORMAT, bits=[4], From 121b999b090d9a88604e3fc1c54bc36a1415a3cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Dec 2025 06:16:50 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/inference/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/auto_round/inference/backend.py b/auto_round/inference/backend.py index e2455aacf..3b243bff0 100644 --- a/auto_round/inference/backend.py +++ b/auto_round/inference/backend.py @@ -577,7 +577,7 @@ def fp8_static_scheme_checker( data_type=["int"], act_bits=WOQ_DEFAULT_ACT_BITS, alias=["ipex"], - requirements=["torch<2.9","intel-extension-for-pytorch>=2.5"], + requirements=["torch<2.9", "intel-extension-for-pytorch>=2.5"], )