Sync codes with repo https://github.com/fireicewolf/wd-llm-caption-cli

fireicewolf · Oct 5, 2024 · dd54371 · dd54371
1 parent d9a12c9
commit dd54371
Show file tree

Hide file tree

Showing 9 changed files with 523 additions and 343 deletions.
diff --git a/README.md b/README.md
@@ -1,35 +1,41 @@
 # Joy Caption Cli
-A Python base cli tool for tagging images with [joy-caption-pre-alpha](https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha) models.
+
+A Python base cli tool for tagging images
+with [joy-caption-pre-alpha](https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha) models.
 
 ### Only support cuda devices in current.
 
 ## Introduce
 
-I make this repo because I want to caption some images cross-platform (On My old MBP, my game win pc or docker base linux cloud-server(like Google colab))
+I make this repo because I want to caption some images cross-platform (On My old MBP, my game win pc or docker base
+linux cloud-server(like Google colab))
 
-But I don't want to install a huge webui just for this little work. And some cloud-service are unfriendly to gradio base ui.
+But I don't want to install a huge webui just for this little work. And some cloud-service are unfriendly to gradio base
+ui.
 
 So this repo born.
 
-
 ## Model source
 
-Huggingface are original sources, modelscope are pure forks from Huggingface(Because HuggingFace was blocked in Some place).
+Huggingface are original sources, modelscope are pure forks from Huggingface(Because HuggingFace was blocked in Some
+place).
 
-|               Model               |                                HuggingFace Link                                |                                       ModelScope Link                                        |
-|:---------------------------------:|:------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------:|
-|       joy-caption-pre-alpha       |     [HuggingFace](https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha)      |       [ModelScope](https://www.modelscope.cn/models/fireicewolf/joy-caption-pre-alpha)       |
-| siglip-so400m-patch14-384(Google) |       [HuggingFace](https://huggingface.co/google/siglip-so400m-patch14-384)       |        [ModelScope](https://www.modelscope.cn/models/fireicewolf/siglip-so400m-patch14-384)        |
-|         Meta-Llama-3.1-8B         |    [HuggingFace](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)     |        [ModelScope](https://www.modelscope.cn/models/fireicewolf/Meta-Llama-3.1-8B)         |
+|               Model               |                               HuggingFace Link                                |                                   ModelScope Link                                    |
+|:---------------------------------:|:-----------------------------------------------------------------------------:|:------------------------------------------------------------------------------------:|
+|       joy-caption-pre-alpha       | [HuggingFace](https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha) |   [ModelScope](https://www.modelscope.cn/models/fireicewolf/joy-caption-pre-alpha)   |
+| siglip-so400m-patch14-384(Google) |    [HuggingFace](https://huggingface.co/google/siglip-so400m-patch14-384)     | [ModelScope](https://www.modelscope.cn/models/fireicewolf/siglip-so400m-patch14-384) |
+|         Meta-Llama-3.1-8B         |      [HuggingFace](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)       |     [ModelScope](https://www.modelscope.cn/models/fireicewolf/Meta-Llama-3.1-8B)     |
 
 ## TO-DO
 
 make a simple ui by Jupyter widget(When my lazy cancer cured😊)
 
 ## Installation
+
 Python 3.10 works fine.
 
 Open a shell terminal and follow below steps:
+
 ```shell
 # Clone this repo
 git clone https://github.com/fireicewolf/joy-caption-cli.git
@@ -54,16 +60,21 @@ pip install -U -r modelscope-requirements.txt
 ```
 
 ## Simple usage
+
 __Make sure your python venv has been activated first!__
+
 ```shell
 python caption.py your_datasets_path
 ```
+
 To run with more options, You can find help by run with this or see at [Options](#options)
+
 ```shell
 python caption.py -h
 ```
 
-##  <span id="options">Options</span>
+## <span id="options">Options</span>
+
 <details>
     <summary>Advance options</summary>
 `data_path`
@@ -81,11 +92,12 @@ config json for llava models, default is "default.json"
 [//]: # (`--use_cpu`)
 
 [//]: # ()
+
 [//]: # (Use cpu for inference.)
 
 `--model_name MODEL_NAME`
 
-model name for inference, default is "Joy-Caption-Pre-Alpha", please check configs/default.json)
+model name for inference, default is "Joy-Caption-Pre-Alpha", please check configs/default.json
 
 `--model_site MODEL_SITE`
 
@@ -142,6 +154,7 @@ max tokens for output, default is 300.
 </details>
 
 ## Credits
+
 Base on [oy-caption-pre-alpha](https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha)
 
 Without their works(👏👏), this repo won't exist.
diff --git a/caption.py b/caption.py
@@ -1,9 +1,10 @@
 import argparse
 import os
+import time
 from datetime import datetime
 from pathlib import Path
 
-from utils.download import download
+from utils.download import download_models
 from utils.joy import Joy
 from utils.logger import Logger
 
@@ -60,147 +61,186 @@ def main(args):
     else:
         models_save_path = Path(os.path.join(Path(__file__).parent, args.models_save_path))
 
-    image_adapter_path, clip_path, llm_path = download(
+    image_adapter_path, clip_path, llm_path = download_models(
         logger=my_logger,
+        args=args,
         config_file=config_file,
-        model_name=str(args.model_name),
-        model_site=str(args.model_site),
         models_save_path=models_save_path,
-        use_sdk_cache=True if args.use_sdk_cache else False,
-        download_method=str(args.download_method)
     )
-
     # Load models
     my_joy = Joy(
         logger=my_logger,
         args=args,
         image_adapter_path=image_adapter_path,
         clip_path=clip_path,
-        llm_path=llm_path
+        llm_path=llm_path,
+        use_gpu=True if not args.llm_use_cpu else False
     )
     my_joy.load_model()
 
     # Inference
+    start_inference_time = time.monotonic()
     my_joy.inference()
+    total_inference_time = time.monotonic() - start_inference_time
+    days = total_inference_time // (24 * 3600)
+    total_inference_time %= (24 * 3600)
+    hours = total_inference_time // 3600
+    total_inference_time %= 3600
+    minutes = total_inference_time // 60
+    seconds = total_inference_time % 60
+    days = f"{days} Day(s) " if days > 0 else ""
+    hours = f"{hours} Hour(s) " if hours > 0 or (days and hours == 0) else ""
+    minutes = f"{minutes} Min(s) " if minutes > 0 or (hours and minutes == 0) else ""
+    seconds = f"{seconds:.1f} Sec(s)"
+    my_logger.info(f"All work done with in {days}{hours}{minutes}{seconds}.")
 
     # Unload models
     my_joy.unload_model()
 
 
 def setup_args() -> argparse.ArgumentParser:
-    args = argparse.ArgumentParser()
-
-    args.add_argument(
+    parsed_args = argparse.ArgumentParser()
+    base_args = parsed_args.add_argument_group("Base")
+    base_args.add_argument(
         'data_path',
         type=str,
         help='path for data.'
     )
-    args.add_argument(
+    base_args.add_argument(
         '--recursive',
         action='store_true',
         help='Include recursive dirs'
     )
-    args.add_argument(
+
+    log_args = parsed_args.add_argument_group("Logs")
+    log_args.add_argument(
+        '--log_level',
+        type=str,
+        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
+        default='INFO',
+        help='set log level, default is "INFO"'
+    )
+    log_args.add_argument(
+        '--save_logs',
+        action='store_true',
+        help='save log file.'
+    )
+
+    download_args = parsed_args.add_argument_group("Download")
+    download_args.add_argument(
         '--config',
         type=str,
         default='default.json',
         help='config json for llava models, default is "default.json"'
     )
-    # args.add_argument(
-    #     '--use_cpu',
-    #     action='store_true',
-    #     help='use cpu for inference.'
-    # )
-    args.add_argument(
-        '--image_size',
-        type=int,
-        default=1024,
-        help='resize image to suitable, default is 1024.'
-    )
-    args.add_argument(
+    download_args.add_argument(
         '--model_name',
         type=str,
         default='Joy-Caption-Pre-Alpha',
         help='model name for inference, default is "Joy-Caption-Pre-Alpha", please check configs/default.json'
     )
-    args.add_argument(
+    download_args.add_argument(
         '--model_site',
         type=str,
         choices=['huggingface', 'modelscope'],
         default='huggingface',
         help='download model from model site huggingface or modelscope, default is "huggingface".'
     )
-    args.add_argument(
+    download_args.add_argument(
         '--models_save_path',
         type=str,
         default="models",
         help='path to save models, default is "models".'
     )
-    args.add_argument(
+    download_args.add_argument(
         '--use_sdk_cache',
         action='store_true',
         help='use sdk\'s cache dir to store models. \
             if this option enabled, "--models_save_path" will be ignored.'
     )
-    args.add_argument(
+    download_args.add_argument(
         '--download_method',
         type=str,
         choices=["SDK", "URL"],
         default='SDK',
         help='download method via SDK or URL, default is "SDK".'
     )
-    args.add_argument(
+    download_args.add_argument(
+        '--force_download',
+        action='store_true',
+        help='force download even file exists.'
+    )
+    download_args.add_argument(
+        '--skip_download',
+        action='store_true',
+        help='skip download if exists.'
+    )
+    download_args.add_argument(
         '--custom_caption_save_path',
         type=str,
         default=None,
         help='Input custom caption file save path.'
     )
-    args.add_argument(
-        '--log_level',
+
+    inference_args = parsed_args.add_argument_group("Inference")
+    inference_args.add_argument(
+        '--llm_use_cpu',
+        action='store_true',
+        help='use cpu for inference.'
+    )
+    inference_args.add_argument(
+        '--llm_dtype',
         type=str,
-        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
-        default='INFO',
-        help='set log level, default is "INFO"'
+        choices=["auto", "fp16", "bf16", "fp32"],
+        default='fp16',
+        help='choice joy LLM load dtype, default is `auto`.'
     )
-    args.add_argument(
-        '--save_logs',
-        action='store_true',
-        help='save log file.'
+    inference_args.add_argument(
+        '--llm_qnt',
+        type=str,
+        choices=["none", "4bit", "8bit"],
+        default='none',
+        help='Enable quantization for LLM ["none","4bit", "8bit"]. default is `none`.'
+    )
+    inference_args.add_argument(
+        '--image_size',
+        type=int,
+        default=1024,
+        help='resize image to suitable, default is 1024.'
     )
-    args.add_argument(
+    inference_args.add_argument(
         '--caption_extension',
         type=str,
         default='.txt',
         help='extension of caption file, default is ".txt"'
     )
-    args.add_argument(
+    inference_args.add_argument(
         '--not_overwrite',
         action='store_true',
         help='not overwrite caption file if exist.'
     )
-    args.add_argument(
+    inference_args.add_argument(
         '--user_prompt',
         type=str,
         default=DEFAULT_USER_PROMPT,
         help='user prompt for caption.'
     )
-    args.add_argument(
+    inference_args.add_argument(
         '--temperature',
         type=float,
         default=0.5,
         help='temperature for Llama model.'
     )
-    args.add_argument(
+    inference_args.add_argument(
         '--max_tokens',
         type=int,
         default=300,
         help='max tokens for output.'
     )
-
-    return args
+    return parsed_args
 
 
 if __name__ == "__main__":
-    args = setup_args()
-    args = args.parse_args()
-    main(args)
+    get_args = setup_args()
+    get_args = get_args.parse_args()
+    main(get_args)