@@ -126,7 +126,7 @@ def feature_num_greater_checker(in_feature, out_feature, num):
126
126
bits = [2 , 4 , 8 ],
127
127
priority = 1 , feature_checks = [feature_multiply_checker_32 ],
128
128
alias = ["auto_round" , "tritonv2" ],
129
- requirements = ["auto-round>=0.2 " ]
129
+ requirements = ["auto-round>=0.5.0 " ]
130
130
)
131
131
132
132
BackendInfos ['auto_round:tritonv2_zp' ] = BackendInfo (device = ["cuda" ], sym = [True ], ## asym has accuracy issue
@@ -135,7 +135,7 @@ def feature_num_greater_checker(in_feature, out_feature, num):
135
135
bits = [2 , 4 , 8 ],
136
136
priority = 1 , feature_checks = [feature_multiply_checker_32 ],
137
137
alias = ["tritonv2" , "tritonv2_zp" ],
138
- requirements = ["auto-round>=0.5" ]
138
+ requirements = ["auto-round>=0.5.0 " ]
139
139
)
140
140
141
141
BackendInfos ['gptqmodel:marlin' ] = BackendInfo (device = ["cuda" ], sym = [True ],
@@ -145,7 +145,7 @@ def feature_num_greater_checker(in_feature, out_feature, num):
145
145
dtype = ["float16" , "bfloat16" ],
146
146
priority = 6 , feature_checks = [in_output_feature_multiply_checker_32 ],
147
147
alias = ["marlin" , "gptqmodel" ],
148
- requirements = ["gptqmodel>=2.0" ]
148
+ requirements = ["gptqmodel>=2.0" ],
149
149
)
150
150
151
151
BackendInfos ['gptqmodel:marlin_zp' ] = BackendInfo (device = ["cuda" ], sym = [True ],
@@ -504,7 +504,7 @@ def get_autogptq_infer_linear(backend, bits=4, group_size=128, sym=False):
504
504
return QuantLinear
505
505
506
506
507
- def find_backend (target_backend : str , orig_backend : str = None ) -> str | None :
507
+ def find_backend (target_backend : str , orig_backend : str = None ):
508
508
"""
509
509
Finds the matching backend key based on the target backend name or its aliases.
510
510
@@ -620,7 +620,10 @@ def get_layer_backend(device, backend, orig_backend, bits, group_size, sym, in_f
620
620
try :
621
621
require_version (requirement )
622
622
except ImportError :
623
- logger .error (f"pip install '{ requirement } ' " )
623
+ if "gptqmodel" in requirement :
624
+ logger .error (f"pip install -v '{ requirement } ' --no-build-isolation" )
625
+ else :
626
+ logger .error (f"pip install '{ requirement } ' " )
624
627
else :
625
628
str_info = requirement ()[1 ]
626
629
logger .error (str_info )
@@ -633,3 +636,63 @@ def get_layer_backend(device, backend, orig_backend, bits, group_size, sym, in_f
633
636
reverse = True )
634
637
635
638
return supported_backends [0 ]
639
+
640
+
641
+ def get_highest_priority_backend (bits , sym , group_size , device , packing_format ):
642
+ supported_backends = []
643
+ for key in BackendInfos .keys ():
644
+ backend = BackendInfos [key ]
645
+ # Check if device is supported by the backend
646
+ if device not in backend .device :
647
+ continue
648
+
649
+ # Check if bit-width is supported
650
+ if bits not in backend .bits :
651
+ continue
652
+
653
+ # Check if group_size is valid (if required by backend)
654
+ if backend .group_size is not None and group_size not in backend .group_size :
655
+ continue
656
+
657
+ # Check if symmetric/asymmetric quantization is supported
658
+ if sym not in backend .sym :
659
+ continue
660
+
661
+ # Check if the format is convertible when packing formats differ
662
+ if packing_format == backend .packing_format or packing_format in backend .convertable_format :
663
+ pass
664
+ else :
665
+ continue
666
+ supported_backends .append (key )
667
+
668
+ if len (supported_backends ) > 0 :
669
+
670
+ supported_backends = sorted (supported_backends ,
671
+ key = lambda support_backend : BackendInfos [support_backend ].priority ,
672
+ reverse = True )
673
+ return supported_backends [0 ]
674
+ else :
675
+ return None
676
+
677
+
678
+ def process_requirement (requirements : list ):
679
+ gptqmodel_requirements = None
680
+ other_requirements = []
681
+ for requirement in requirements :
682
+ if "gptqmodel" in requirement :
683
+ gptqmodel_requirements = requirement
684
+ else :
685
+ other_requirements .append (requirement )
686
+
687
+ infos = []
688
+
689
+ if gptqmodel_requirements is not None :
690
+ infos .append (f"pip install -v '{ gptqmodel_requirements } ' --no-build-isolation" )
691
+ infos .append (f"pip install 'numpy<2.0'" )
692
+
693
+ other_info = f"pip install"
694
+ if len (other_requirements ) > 0 :
695
+ for requirement in other_requirements :
696
+ other_info += f" { requirement } "
697
+ infos .append (other_info )
698
+ return infos
0 commit comments