From fc6d97d5eb461576025b4721fdcfa71587a8c162 Mon Sep 17 00:00:00 2001 From: Vladislav Ananev Date: Mon, 23 Jun 2025 13:37:11 +0400 Subject: [PATCH 1/2] KF-27: Disk I/O and network I/O support --- examples/nnt_model.py | 132 +++++++++++++++++++++++++ kiroframe_arcee/collectors/hardware.py | 42 +++++++- 2 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 examples/nnt_model.py diff --git a/examples/nnt_model.py b/examples/nnt_model.py new file mode 100644 index 0000000..a284c88 --- /dev/null +++ b/examples/nnt_model.py @@ -0,0 +1,132 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +import kiroframe_arcee as arcee + + +def main(): + # init arcee + with arcee.init( + "2f4fda2d-755a-45a1-95d9-d9406606af1e", + task_key="test_task", + endpoint_url="https://10.10.10.10:443/arcee/v2", + ssl=False, + ): + arcee.tag("project", "nnt_model demo") + arcee.tag("model_type", "trivial") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Hyperparameters + input_size = 784 + hidden_size = 128 + num_classes = 10 + num_epochs = 5 + batch_size = 64 + learning_rate = 0.001 + + # Dataset and DataLoader + transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,)), + # MNIST-spec normalization + ] + ) + + print("Downloading datasets..") + arcee.milestone("Download train data") + train_dataset = datasets.MNIST( + root="data", train=True, transform=transform, download=True + ) + arcee.milestone("Download test data") + test_dataset = datasets.MNIST( + root="data", train=False, transform=transform, download=True + ) + + train_loader = DataLoader( + dataset=train_dataset, batch_size=batch_size, shuffle=True + ) + test_loader = DataLoader( + dataset=test_dataset, batch_size=batch_size, shuffle=False + ) + + # Neural Net + class NeuralNet(nn.Module): + def __init__(self): + super(NeuralNet, self).__init__() + self.fc1 = nn.Linear(input_size, hidden_size) + self.relu = nn.ReLU() + self.fc2 = nn.Linear(hidden_size, num_classes) + + def forward(self, x): + x = x.view(-1, 28 * 28) # flatten + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + return x + + model = NeuralNet().to(device) + + # Loss and Optimizer + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(model.parameters(), lr=learning_rate) + + # Training + print("Training..") + arcee.milestone("train") + for epoch in range(num_epochs): + model.train() + total_loss = 0 + correct = 0 + total = 0 + + for images, labels in train_loader: + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + total_loss += loss.item() + _, predicted = torch.max(outputs, 1) + correct += (predicted == labels).sum().item() + total += labels.size(0) + + train_acc = 100 * correct / total + avg_loss = total_loss / len(train_loader) + + arcee.send({"accuracy": train_acc, + "loss": avg_loss, "epoch": epoch + 1}) + arcee.send({"loss": f"{avg_loss:.2f}%"}) + arcee.send({"epoch": f"[{epoch + 1}/{num_epochs}]"}) + print( + f"Epoch [{epoch + 1}/{num_epochs}], " + f"Loss: {avg_loss:.4f}, Accuracy: {train_acc:.2f}%" + ) + + # Testing + print("Testing..") + arcee.milestone("test") + model.eval() + with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images, labels = images.to(device), labels.to(device) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + arcee.send({"test_accuracy": 100 * correct / total}) + print(f"Test Accuracy: {100 * correct / total:.2f}%") + + +if __name__ == "__main__": + main() diff --git a/kiroframe_arcee/collectors/hardware.py b/kiroframe_arcee/collectors/hardware.py index 89dfdfd..6177e4c 100644 --- a/kiroframe_arcee/collectors/hardware.py +++ b/kiroframe_arcee/collectors/hardware.py @@ -1,3 +1,4 @@ +import asyncio import math import os import concurrent.futures @@ -13,6 +14,8 @@ _MEASURE_TIME = 1 - 1 / (os.cpu_count()) _TIME_INTERVALS = (_MEASURE_TIME + 0.05, _MEASURE_TIME + 0.01) +BYTES_IN_KiB = 1024 + class Collector: executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) @@ -112,6 +115,43 @@ def _collect_stats(cls): "proc": ps_info, } + @staticmethod + async def _io(interval: float = 1): + def _get_io(): + disk = psutil.disk_io_counters() + net = psutil.net_io_counters() + return { + "disk_read": disk.read_bytes, + "disk_write": disk.write_bytes, + "net_sent": net.bytes_sent, + "net_recv": net.bytes_recv + } + + before = _get_io() + await asyncio.sleep(interval) + after = _get_io() + disk_read = round( + (after['disk_read'] - before['disk_read']) / BYTES_IN_KiB, 2) + disk_write = round( + (after['disk_write'] - before['disk_write']) / BYTES_IN_KiB, 2) + net_sent = round( + (after['net_sent'] - before['net_sent']) / BYTES_IN_KiB, 2) + net_recv = round( + (after['net_recv'] - before['net_recv']) / BYTES_IN_KiB, 2) + return disk_read, disk_write, net_sent, net_recv + @classmethod async def collect_stats(cls): - return await run_async(cls._collect_stats, executor=cls.executor) + disk_read, disk_write, net_sent, net_recv = await cls._io() + result = { + # IO stats in KiB/s + "io_stats": { + "disk_read": disk_read, + "disk_write": disk_write, + "net_sent": net_sent, + "net_recv": net_recv, + } + } + ps_stats = await run_async(cls._collect_stats, executor=cls.executor) + result.update(ps_stats) + return result From 8abbc4a759558308affecb70ce6f0b084656eee5 Mon Sep 17 00:00:00 2001 From: Vladislav Ananev Date: Wed, 2 Jul 2025 15:27:04 +0400 Subject: [PATCH 2/2] KF-27: Addressed comments --- examples/nnt_model.py | 4 +--- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/nnt_model.py b/examples/nnt_model.py index a284c88..67b8d7f 100644 --- a/examples/nnt_model.py +++ b/examples/nnt_model.py @@ -10,10 +10,8 @@ def main(): # init arcee with arcee.init( - "2f4fda2d-755a-45a1-95d9-d9406606af1e", + "YOU-PROFILING_TOKEN", task_key="test_task", - endpoint_url="https://10.10.10.10:443/arcee/v2", - ssl=False, ): arcee.tag("project", "nnt_model demo") arcee.tag("model_type", "trivial") diff --git a/setup.cfg b/setup.cfg index 53f3ba3..44642c4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ # setup.cfg [metadata] name = kiroframe_arcee -version = 0.1.50 +version = 0.1.51 author = Hystax description = ML profiling tool for Kiroframe long_description = file: README.md