@@ -311,61 +311,13 @@ def setup_eval_parser():
311
311
return args
312
312
313
313
314
- def _gguf_args_check (args ):
315
- from auto_round .utils import logger
316
- from auto_round .export .export_to_gguf .config import GGUF_CONFIG
317
-
318
- formats = args .format .lower ().replace (' ' , '' ).split ("," )
319
- for format in GGUF_CONFIG :
320
- if format in formats :
321
- from pathlib import Path
322
- from auto_round .export .export_to_gguf .convert import Model
323
- hparams = Model .load_hparams (Path (args .model ))
324
- model_architecture = hparams ["architectures" ][0 ]
325
- try :
326
- model_class = Model .from_model_architecture (model_architecture )
327
- except NotImplementedError :
328
- logger .error (f"Model { model_architecture } is not supported to export GGUF format" )
329
- sys .exit (1 )
330
-
331
- if format .endswith ("_k" ) and ("hidden_size" in hparams and hparams ["hidden_size" ] % 256 != 0 ):
332
- model_name = args .model .split ('/' )
333
- model_name = model_name [- 1 ] if model_name [- 1 ] else model_name [- 2 ]
334
- hidden_size = hparams ["hidden_size" ]
335
- logger .error (
336
- f"Currently only support pure mode for format: { format } . "
337
- f"{ model_name } is not supported, cause hidden_size({ hidden_size } ) % 256 !=0" )
338
- sys .exit (- 1 )
339
-
340
- unsupport_list , reset_list = [], []
341
- gguf_config = GGUF_CONFIG [format ]
342
- for k , v in gguf_config .items ():
343
- if getattr (args , k ) != v :
344
- unsupport_list .append (f"{ k } ={ getattr (args , k )} " )
345
- reset_list .append (f"{ k } ={ v } " )
346
- setattr (args , k , v )
347
- if len (unsupport_list ) > 0 :
348
- if len (formats ) > 1 :
349
- logger .error (
350
- f"format { format } not support for { ', ' .join (unsupport_list )} ,"
351
- f" please reset to { ', ' .join (reset_list )} , and retry" )
352
- exit (- 1 )
353
- else :
354
- logger .error (
355
- f"format { format } not support for { ', ' .join (unsupport_list )} ,"
356
- f" reset to { ', ' .join (reset_list )} ." )
357
- logger .info (f"export format { format } , sym = { not args .asym } , group_size = { args .group_size } " )
358
-
359
- return args
360
-
361
-
362
314
def tune (args ):
363
315
import transformers
364
316
365
317
from transformers import AutoModelForCausalLM , AutoTokenizer , AutoModel , AutoConfig
366
318
367
319
from auto_round .utils import detect_device , get_library_version
368
- from auto_round .utils import logger
320
+ from auto_round .utils import logger , _gguf_args_check
369
321
370
322
tasks = args .tasks
371
323
if args .format is None :
@@ -602,9 +554,12 @@ def tune(args):
602
554
603
555
if args .act_bits <= 8 or eval_gguf_model :
604
556
if eval_gguf_model :
557
+ # gguf floder only contains one file
605
558
for file in os .listdir (eval_folder ):
606
559
gguf_file = file
607
- user_model = AutoModelForCausalLM .from_pretrained (eval_folder , gguf_file = gguf_file , device_map = "auto" )
560
+ model = AutoModelForCausalLM .from_pretrained (
561
+ eval_folder , gguf_file = gguf_file , device_map = "auto" if use_auto_mapping else None )
562
+ tokenizer = AutoTokenizer .from_pretrained (eval_folder , gguf_file = gguf_file )
608
563
else :
609
564
if hasattr (model , "hf_device_map" ) and len (model .hf_device_map ) > 1 :
610
565
from accelerate .big_modeling import dispatch_model
@@ -616,7 +571,8 @@ def tune(args):
616
571
user_model = model .to (device_str )
617
572
618
573
if args .eval_task_by_task :
619
- eval_task_by_task (user_model , device = device_str , tasks = args .tasks , batch_size = args .eval_bs )
574
+ eval_task_by_task (
575
+ user_model , tokenizer = tokenizer , device = device_str , tasks = args .tasks , batch_size = args .eval_bs )
620
576
else :
621
577
if args .eval_bs is None or args .eval_bs == "auto" :
622
578
logger .warning ("This API does not support auto currently, reset eval_bs to 16" )
@@ -660,7 +616,8 @@ def eval(args):
660
616
print (make_table (res ))
661
617
662
618
663
- def eval_task_by_task (model , device , tasks , tokenizer = None , batch_size = None , max_batch_size = 64 , trust_remote_code = True ):
619
+ def eval_task_by_task (
620
+ model , device = None , tasks = None , tokenizer = None , batch_size = None , max_batch_size = 64 , trust_remote_code = True ):
664
621
set_cuda_visible_devices (device )
665
622
device_str , parallelism = get_device_and_parallelism (device )
666
623
0 commit comments