Skip to content

Commit 309da1c

Browse files
committed
vLLM metadata script
1 parent 7aa8967 commit 309da1c

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed

.github/workflows/vllm-metadata.yml

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Step1: scrape https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/registry.py
2+
# Step2: upload to https://huggingface.co/datasets/huggingface/vllm-metadata
3+
name: Daily vLLM Metadata Scraper
4+
5+
on:
6+
push:
7+
schedule:
8+
# Runs at 00:00 UTC every day
9+
- cron: "0 0 * * *"
10+
11+
jobs:
12+
run-python-script:
13+
runs-on: ubuntu-latest
14+
15+
steps:
16+
- name: Checkout repository
17+
uses: actions/checkout@v3
18+
19+
- name: Set up Python
20+
uses: actions/setup-python@v4
21+
with:
22+
python-version: "3.10"
23+
24+
- name: Install dependencies
25+
run: |
26+
python -m pip install --upgrade pip
27+
pip install requests huggingface-hub
28+
29+
- name: Execute Python script
30+
env:
31+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
32+
run: |
33+
python -c '
34+
import os
35+
import ast
36+
import json
37+
import requests
38+
from huggingface_hub import HfApi
39+
40+
def extract_models_sub_dict(parsed_code, sub_dict_name):
41+
class MODELS_SUB_LIST_VISITOR(ast.NodeVisitor):
42+
def __init__(self):
43+
self.key = sub_dict_name
44+
self.value = None
45+
46+
def visit_Assign(self, node):
47+
for target in node.targets:
48+
if isinstance(target, ast.Name) and target.id == self.key:
49+
self.value = ast.literal_eval(node.value)
50+
51+
visitor = MODELS_SUB_LIST_VISITOR()
52+
visitor.visit(parsed_code)
53+
return visitor.value
54+
55+
def extract_models_dict(source_code):
56+
parsed_code = ast.parse(source_code)
57+
class MODELS_LIST_VISITOR(ast.NodeVisitor):
58+
def __init__(self):
59+
self.key = "_MODELS"
60+
self.value = {}
61+
def visit_Assign(self, node):
62+
for target in node.targets:
63+
if not isinstance(target, ast.Name):
64+
return
65+
if target.id == self.key:
66+
for value in node.value.values:
67+
dict = extract_models_sub_dict(parsed_code, value.id)
68+
self.value.update(dict)
69+
visitor = MODELS_LIST_VISITOR()
70+
visitor.visit(parsed_code)
71+
return visitor.value
72+
73+
url = "https://raw.githubusercontent.com/vllm-project/vllm/refs/heads/main/vllm/model_executor/models/registry.py"
74+
response = requests.get(url)
75+
response.raise_for_status() # Raise an exception for bad status codes
76+
source_code = response.text
77+
78+
models_dict = extract_models_dict(source_code)
79+
architectures = [item for tup in models_dict.values() for item in tup]
80+
architectures_json_str = json.dumps(architectures, indent=4)
81+
json_bytes = architectures_json_str.encode("utf-8")
82+
print(architectures_json_str)'

0 commit comments

Comments
 (0)