Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions python/ray/dashboard/modules/reporter/reporter_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,8 +1312,8 @@ def generate_worker_stats_record(self, worker_stats: List[dict]) -> List[Record]

for stat in worker_stats:
cmdline = stat.get("cmdline")
# All ray processes start with ray::
if cmdline and len(cmdline) > 0 and cmdline[0].startswith("ray::"):
# collect both worker and driver stats
if cmdline:
proc_name = cmdline[0]
proc_name_to_stats[proc_name].append(stat)

Expand All @@ -1323,9 +1323,6 @@ def generate_worker_stats_record(self, worker_stats: List[dict]) -> List[Record]
or stat.get("gpu_utilization", 0) > 0
):
gpu_worker_proc_names.add(proc_name)
# We will lose worker stats that don't follow the ray worker proc
# naming convention. Theoretically, there should be no data loss here
# because all worker processes are renamed to ray::.

records = []

Expand Down
31 changes: 0 additions & 31 deletions python/ray/dashboard/modules/reporter/tests/test_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,37 +844,6 @@ def verify_metrics_values(
0,
)

"""
Verify worker names are only reported when they start with ray::.
"""
# Verify if the command doesn't start with ray::, metrics are not reported.
unknown_stats = {
"memory_info": Bunch(rss=55934976, vms=7026937856, pfaults=15354, pageins=0),
"memory_full_info": Bunch(
uss=51428381, rss=55934976, vms=7026937856, pfaults=15354, pageins=0
),
"cpu_percent": 6.0,
"num_fds": 8,
"cmdline": ["python mock", "", "", "", "", "", "", "", "", "", "", ""],
"create_time": 1614826391.338613,
"pid": 7175,
"cpu_times": Bunch(
user=0.607899328,
system=0.274044032,
children_user=0.0,
children_system=0.0,
),
}
test_stats["workers"] = [idle_stats, unknown_stats]

records = agent._to_records(test_stats, cluster_stats)
uss_records, cpu_records, num_fds_records = get_uss_and_cpu_and_num_fds_records(
records
)
assert "python mock" not in uss_records
assert "python mock" not in cpu_records
assert "python mock" not in num_fds_records

stats_payload = agent._generate_stats_payload(test_stats)
assert stats_payload is not None
assert isinstance(stats_payload, str)
Expand Down
3 changes: 2 additions & 1 deletion python/ray/tests/test_metrics_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def verify_node_metrics():
samples = avail_metrics[metric]
for sample in samples:
components.add(sample.labels["Component"])
assert components == {"gcs", "raylet", "agent", "ray::IDLE"}
assert components == {"gcs", "raylet", "agent", "ray::IDLE", sys.executable}

avail_metrics = set(avail_metrics)

Expand Down Expand Up @@ -886,6 +886,7 @@ def verify_components():
components.add(sample.labels["Component"])
print(components)
assert {
sys.executable, # driver process
"raylet",
"agent",
"ray::Actor",
Expand Down