Skip to content

Commit 7de8a70

Browse files
committed
Fix Big Query feature
1 parent e6aaca5 commit 7de8a70

File tree

57 files changed

+4336
-9
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+4336
-9
lines changed

benchmarks/benchmark_db_utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,10 @@ def write_run(
139139

140140
# pylint: disable=import-outside-toplevel
141141

142-
from benchmark_db_writer import bq_writer_utils
143-
from benchmark_db_writer import dataclass_bigquery_writer
144-
from benchmark_db_writer.run_summary_writer import sample_run_summary_writer
145-
from benchmark_db_writer.schema.workload_benchmark_v2 import workload_benchmark_v2_schema
142+
from benchmarks.benchmark_db_writer import bq_writer_utils
143+
from benchmarks.benchmark_db_writer import dataclass_bigquery_writer
144+
from benchmarks.benchmark_db_writer.run_summary_writer import run_summary_writer
145+
from benchmarks.benchmark_db_writer.schema.workload_benchmark_v2 import workload_benchmark_v2_schema
146146

147147
def get_db_client(
148148
project: str, dataset: str, table: str, dataclass_type: Type, is_test: bool = False
@@ -168,9 +168,9 @@ def get_db_client(
168168
print(options.model_id)
169169

170170
if (
171-
sample_run_summary_writer.validate_model_id(options.model_id, options.is_test)
172-
and sample_run_summary_writer.validate_hardware_id(options.hardware_id, options.is_test)
173-
and sample_run_summary_writer.validate_software_id(options.software_id, options.is_test)
171+
run_summary_writer.validate_model_id(options.model_id, options.is_test)
172+
and run_summary_writer.validate_hardware_id(options.hardware_id, options.is_test)
173+
and run_summary_writer.validate_software_id(options.software_id, options.is_test)
174174
):
175175
summary = workload_benchmark_v2_schema.WorkloadBenchmarkV2Schema(
176176
run_id=f"run-{uuid.uuid4()}",
@@ -179,6 +179,7 @@ def get_db_client(
179179
hardware_id=options.hardware_id,
180180
hardware_num_chips=number_of_chips,
181181
hardware_num_nodes=number_of_nodes,
182+
hardware_num_slices=options.hardware_num_slices,
182183
result_success=run_success,
183184
configs_framework=framework_config_in_json,
184185
configs_env=env_variables,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import datetime
2+
3+
__version__ = "1.0.0.dev" + datetime.datetime.now().strftime("%Y%m%d")
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import datetime
2+
import decimal
3+
import enum
4+
from typing import Dict, NewType, Type
5+
6+
7+
class BigQueryFieldModes(str, enum.Enum):
8+
NULLABLE = "NULLABLE"
9+
REQUIRED = "REQUIRED"
10+
REPEATED = "REPEATED"
11+
12+
13+
class BigQueryTypes(str, enum.Enum):
14+
STRING = "STRING"
15+
BYTES = "BYTES"
16+
INTEGER = "INT64"
17+
INT64 = "INT64"
18+
FLOAT64 = "FLOAT64"
19+
FLOAT = "FLOAT64"
20+
NUMERIC = "NUMERIC"
21+
BOOL = "BOOL"
22+
BOOLEAN = "BOOL"
23+
STRUCT = "STRUCT"
24+
RECORD = "STRUCT"
25+
TIMESTAMP = "TIMESTAMP"
26+
DATE = "DATE"
27+
TIME = "TIME"
28+
DATETIME = "DATETIME"
29+
GEOGRAPHY = "GEOGRAPHY"
30+
JSON = "JSON"
31+
32+
33+
Geography = NewType("Geography", str)
34+
35+
36+
class TimeStamp(datetime.datetime):
37+
pass
38+
39+
40+
TypeMapping: Dict[BigQueryTypes, Type] = {
41+
BigQueryTypes.STRING: str,
42+
BigQueryTypes.BYTES: bytes,
43+
BigQueryTypes.INT64: int,
44+
BigQueryTypes.FLOAT64: float,
45+
BigQueryTypes.NUMERIC: decimal.Decimal,
46+
BigQueryTypes.BOOL: bool,
47+
BigQueryTypes.TIMESTAMP: TimeStamp,
48+
BigQueryTypes.DATE: datetime.date,
49+
BigQueryTypes.TIME: datetime.time,
50+
BigQueryTypes.DATETIME: datetime.datetime,
51+
BigQueryTypes.GEOGRAPHY: Geography,
52+
BigQueryTypes.JSON: dict,
53+
}

benchmarks/benchmark_db_writer/bq_info_writer/__init__.py

Whitespace-only changes.
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""TODO: Update hardware info in the main function & run the script."""
2+
3+
import logging
4+
import os
5+
6+
from benchmarks.benchmark_db_writer import bq_writer_utils
7+
from benchmarks.benchmark_db_writer.schema.workload_benchmark_v2 import (
8+
hardware_info_schema,
9+
)
10+
11+
logging.basicConfig(
12+
format="%(asctime)s %(levelname)-8s %(message)s",
13+
level=logging.INFO,
14+
datefmt="%Y-%m-%d %H:%M:%S",
15+
)
16+
logger = logging.getLogger(__name__)
17+
18+
19+
def write_hardware_config(
20+
project,
21+
dataset,
22+
table,
23+
dataclass_type,
24+
hardware_id,
25+
gcp_accelerator_name,
26+
chip_name,
27+
bf_16_tflops,
28+
memory,
29+
hardware_type,
30+
provider_name,
31+
chips_per_node=None,
32+
update_person_ldap=os.getenv("USER", "mrv2"),
33+
description="",
34+
other="",
35+
host_memory=None,
36+
host_vcpus=None,
37+
):
38+
39+
writer = bq_writer_utils.create_bq_writer_object(
40+
project=project,
41+
dataset=dataset,
42+
table=table,
43+
dataclass_type=dataclass_type,
44+
)
45+
46+
hardware_info = writer.query(where={"hardware_id": hardware_id})
47+
if hardware_info:
48+
raise ValueError("Hardware id %s is already present in the %s table" % (hardware_id, table))
49+
50+
hardware_data = hardware_info_schema.HardwareInfo(
51+
hardware_id=hardware_id,
52+
gcp_accelerator_name=gcp_accelerator_name,
53+
chip_name=chip_name,
54+
bf_16_tflops=bf_16_tflops,
55+
memory=memory,
56+
chips_per_node=chips_per_node,
57+
hardware_type=hardware_type,
58+
provider_name=provider_name,
59+
update_person_ldap=update_person_ldap,
60+
description=description,
61+
other=other,
62+
host_memory=host_memory,
63+
host_vcpus=host_vcpus,
64+
)
65+
66+
logging.info("Writing Data %s to %s table.", hardware_data, table)
67+
writer.write([hardware_data])
68+
69+
70+
if __name__ == "__main__":
71+
72+
table_configs = [
73+
{
74+
"project": "ml-workload-benchmarks",
75+
"dataset": "benchmark_dataset_v2",
76+
"table": "hardware_info",
77+
},
78+
{
79+
"project": "supercomputer-testing",
80+
"dataset": "mantaray_v2",
81+
"table": "hardware_info",
82+
},
83+
]
84+
85+
# Update it on every run
86+
hardware_id = "a4"
87+
gcp_accelerator_name = "A4"
88+
chip_name = "B200"
89+
bf_16_tflops = 2237
90+
memory = 180
91+
chips_per_node = 8
92+
hardware_type = "GPU"
93+
provider_name = "Nvidia"
94+
description = ""
95+
96+
for table_config in table_configs:
97+
write_hardware_config(
98+
project=table_config["project"],
99+
dataset=table_config["dataset"],
100+
table=table_config["table"],
101+
dataclass_type=hardware_info_schema.HardwareInfo,
102+
hardware_id=hardware_id,
103+
gcp_accelerator_name=gcp_accelerator_name,
104+
chip_name=chip_name,
105+
bf_16_tflops=bf_16_tflops,
106+
memory=memory,
107+
chips_per_node=chips_per_node,
108+
description=description,
109+
hardware_type=hardware_type,
110+
provider_name=provider_name,
111+
)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""TODO: Update hardware info in the main function & run the script."""
2+
3+
import logging
4+
import os
5+
6+
from benchmarks.benchmark_db_writer import bq_writer_utils
7+
from benchmarks.benchmark_db_writer.schema.workload_benchmark_v2 import (
8+
microbenchmark_workload_info_schema,
9+
)
10+
11+
logging.basicConfig(
12+
format="%(asctime)s %(levelname)-8s %(message)s",
13+
level=logging.INFO,
14+
datefmt="%Y-%m-%d %H:%M:%S",
15+
)
16+
logger = logging.getLogger(__name__)
17+
18+
19+
def write_microbenchmark_workload_config(
20+
project,
21+
dataset,
22+
table,
23+
dataclass_type,
24+
workload_id,
25+
update_person_ldap=os.getenv("USER", "imo-eng"),
26+
description="",
27+
):
28+
29+
writer = bq_writer_utils.create_bq_writer_object(
30+
project=project,
31+
dataset=dataset,
32+
table=table,
33+
dataclass_type=dataclass_type,
34+
)
35+
36+
microbenchmark_workload_info = writer.query(where={"workload_id": workload_id})
37+
if microbenchmark_workload_info:
38+
raise ValueError("Workload id %s is already present in the %s table" % (microbenchmark_workload_info, table))
39+
40+
workload_data = microbenchmark_workload_info_schema.MicrobenchmarkWorkloadInfo(
41+
workload_id=workload_id,
42+
update_person_ldap=update_person_ldap,
43+
description=description,
44+
)
45+
46+
logging.info("Writing Data %s to %s table.", workload_data, table)
47+
writer.write([workload_data])
48+
49+
50+
def insert(workload_id, description=""):
51+
table_configs = [
52+
{
53+
"project": "ml-workload-benchmarks",
54+
"dataset": "benchmark_dataset_v2",
55+
"table": "microbenchmark_workload_info",
56+
},
57+
{
58+
"project": "supercomputer-testing",
59+
"dataset": "mantaray_v2",
60+
"table": "microbenchmark_workload_info",
61+
},
62+
]
63+
64+
assert workload_id is not None
65+
66+
for table_config in table_configs:
67+
write_microbenchmark_workload_config(
68+
project=table_config["project"],
69+
dataset=table_config["dataset"],
70+
table=table_config["table"],
71+
dataclass_type=microbenchmark_workload_info_schema.MicrobenchmarkWorkloadInfo,
72+
workload_id=workload_id,
73+
description=description,
74+
)
75+
76+
77+
if __name__ == "__main__":
78+
79+
# workloads = ["all_gather", "ppermute", "psum", "psum_scatter"]
80+
workloads = []
81+
for workload in workloads:
82+
insert(workload, "")
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""TODO: Update model info in the main function & run the script."""
2+
3+
import logging
4+
import os
5+
6+
from benchmarks.benchmark_db_writer import bq_writer_utils
7+
from benchmarks.benchmark_db_writer.schema.workload_benchmark_v2 import model_info_schema
8+
9+
logging.basicConfig(
10+
format="%(asctime)s %(levelname)-8s %(message)s",
11+
level=logging.INFO,
12+
datefmt="%Y-%m-%d %H:%M:%S",
13+
)
14+
logger = logging.getLogger(__name__)
15+
16+
17+
def write_model_config(
18+
project,
19+
dataset,
20+
table,
21+
dataclass_type,
22+
model_id,
23+
name,
24+
variant,
25+
parameter_size_in_billions,
26+
update_person_ldap=os.getenv("USER", "mrv2"),
27+
description="",
28+
details="",
29+
):
30+
31+
writer = bq_writer_utils.create_bq_writer_object(
32+
project=project,
33+
dataset=dataset,
34+
table=table,
35+
dataclass_type=dataclass_type,
36+
)
37+
38+
model_info = writer.query(where={"model_id": model_id})
39+
if model_info:
40+
raise ValueError("Model id %s is already present in the %s table" % (model_id, table))
41+
42+
# Check if there is already a model info based on name,
43+
# variant and parameter size
44+
model_info = writer.query(
45+
where={
46+
"name": name,
47+
"variant": variant,
48+
"parameter_size_in_billions": parameter_size_in_billions,
49+
}
50+
)
51+
if model_info:
52+
raise ValueError(
53+
"Model with name %s, variant %s and "
54+
"parameter size %s is already present in the %s "
55+
"table" % (name, variant, parameter_size_in_billions, table)
56+
)
57+
58+
model_data = model_info_schema.ModelInfo(
59+
model_id=model_id,
60+
name=name,
61+
variant=variant,
62+
parameter_size_in_billions=parameter_size_in_billions,
63+
update_person_ldap=update_person_ldap,
64+
description=description,
65+
details=details,
66+
)
67+
68+
logging.info("Writing Data %s to %s table.", model_data, table)
69+
writer.write([model_data])
70+
71+
72+
if __name__ == "__main__":
73+
74+
table_configs = [
75+
{
76+
"project": "ml-workload-benchmarks",
77+
"dataset": "benchmark_dataset_v2",
78+
"table": "model_info",
79+
},
80+
{
81+
"project": "supercomputer-testing",
82+
"dataset": "mantaray_v2",
83+
"table": "model_info",
84+
},
85+
]
86+
87+
# Update it on every run
88+
model_id = "mistral-7b"
89+
name = "Mistral"
90+
variant = "7B"
91+
parameter_size_in_billions = 7
92+
description = "https://huggingface.co/mistralai/Mistral-7B-v0.3"
93+
94+
for table_config in table_configs:
95+
write_model_config(
96+
project=table_config["project"],
97+
dataset=table_config["dataset"],
98+
table=table_config["table"],
99+
model_id=model_id,
100+
dataclass_type=model_info_schema.ModelInfo,
101+
name=name,
102+
variant=variant,
103+
parameter_size_in_billions=parameter_size_in_billions,
104+
description=description,
105+
)

0 commit comments

Comments
 (0)