|
| 1 | +# Step1: scrape https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/registry.py |
| 2 | +# Step2: upload to https://huggingface.co/datasets/huggingface/vllm-metadata |
| 3 | +name: Daily vLLM Metadata Scraper |
| 4 | + |
| 5 | +on: |
| 6 | + push: |
| 7 | + schedule: |
| 8 | + # Runs at 00:00 UTC every day |
| 9 | + - cron: "0 0 * * *" |
| 10 | + |
| 11 | +jobs: |
| 12 | + run-python-script: |
| 13 | + runs-on: ubuntu-latest |
| 14 | + |
| 15 | + steps: |
| 16 | + - name: Checkout repository |
| 17 | + uses: actions/checkout@v3 |
| 18 | + |
| 19 | + - name: Set up Python |
| 20 | + uses: actions/setup-python@v4 |
| 21 | + with: |
| 22 | + python-version: "3.10" |
| 23 | + |
| 24 | + - name: Install dependencies |
| 25 | + run: | |
| 26 | + python -m pip install --upgrade pip |
| 27 | + pip install requests huggingface-hub |
| 28 | +
|
| 29 | + - name: Execute Python script |
| 30 | + env: |
| 31 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| 32 | + run: | |
| 33 | + python -c ' |
| 34 | + import os |
| 35 | + import ast |
| 36 | + import json |
| 37 | + import requests |
| 38 | + from huggingface_hub import HfApi |
| 39 | +
|
| 40 | + def extract_models_sub_dict(parsed_code, sub_dict_name): |
| 41 | + class MODELS_SUB_LIST_VISITOR(ast.NodeVisitor): |
| 42 | + def __init__(self): |
| 43 | + self.key = sub_dict_name |
| 44 | + self.value = None |
| 45 | + |
| 46 | + def visit_Assign(self, node): |
| 47 | + for target in node.targets: |
| 48 | + if isinstance(target, ast.Name) and target.id == self.key: |
| 49 | + self.value = ast.literal_eval(node.value) |
| 50 | + |
| 51 | + visitor = MODELS_SUB_LIST_VISITOR() |
| 52 | + visitor.visit(parsed_code) |
| 53 | + return visitor.value |
| 54 | +
|
| 55 | + def extract_models_dict(source_code): |
| 56 | + parsed_code = ast.parse(source_code) |
| 57 | + class MODELS_LIST_VISITOR(ast.NodeVisitor): |
| 58 | + def __init__(self): |
| 59 | + self.key = "_MODELS" |
| 60 | + self.value = {} |
| 61 | + def visit_Assign(self, node): |
| 62 | + for target in node.targets: |
| 63 | + if not isinstance(target, ast.Name): |
| 64 | + return |
| 65 | + if target.id == self.key: |
| 66 | + for value in node.value.values: |
| 67 | + dict = extract_models_sub_dict(parsed_code, value.id) |
| 68 | + self.value.update(dict) |
| 69 | + visitor = MODELS_LIST_VISITOR() |
| 70 | + visitor.visit(parsed_code) |
| 71 | + return visitor.value |
| 72 | +
|
| 73 | + url = "https://raw.githubusercontent.com/vllm-project/vllm/refs/heads/main/vllm/model_executor/models/registry.py" |
| 74 | + response = requests.get(url) |
| 75 | + response.raise_for_status() # Raise an exception for bad status codes |
| 76 | + source_code = response.text |
| 77 | +
|
| 78 | + models_dict = extract_models_dict(source_code) |
| 79 | + architectures = [item for tup in models_dict.values() for item in tup] |
| 80 | + architectures_json_str = json.dumps(architectures, indent=4) |
| 81 | + json_bytes = architectures_json_str.encode("utf-8") |
| 82 | + print(architectures_json_str)' |
0 commit comments