Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions examples/nnt_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import kiroframe_arcee as arcee


def main():
# init arcee
with arcee.init(
"YOU-PROFILING_TOKEN",
task_key="test_task",
):
arcee.tag("project", "nnt_model demo")
arcee.tag("model_type", "trivial")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
input_size = 784
hidden_size = 128
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# Dataset and DataLoader
transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
# MNIST-spec normalization
]
)

print("Downloading datasets..")
arcee.milestone("Download train data")
train_dataset = datasets.MNIST(
root="data", train=True, transform=transform, download=True
)
arcee.milestone("Download test data")
test_dataset = datasets.MNIST(
root="data", train=False, transform=transform, download=True
)

train_loader = DataLoader(
dataset=train_dataset, batch_size=batch_size, shuffle=True
)
test_loader = DataLoader(
dataset=test_dataset, batch_size=batch_size, shuffle=False
)

# Neural Net
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)

def forward(self, x):
x = x.view(-1, 28 * 28) # flatten
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x

model = NeuralNet().to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training
print("Training..")
arcee.milestone("train")
for epoch in range(num_epochs):
model.train()
total_loss = 0
correct = 0
total = 0

for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)

outputs = model(images)
loss = criterion(outputs, labels)

optimizer.zero_grad()
loss.backward()
optimizer.step()

total_loss += loss.item()
_, predicted = torch.max(outputs, 1)
correct += (predicted == labels).sum().item()
total += labels.size(0)

train_acc = 100 * correct / total
avg_loss = total_loss / len(train_loader)

arcee.send({"accuracy": train_acc,
"loss": avg_loss, "epoch": epoch + 1})
arcee.send({"loss": f"{avg_loss:.2f}%"})
arcee.send({"epoch": f"[{epoch + 1}/{num_epochs}]"})
print(
f"Epoch [{epoch + 1}/{num_epochs}], "
f"Loss: {avg_loss:.4f}, Accuracy: {train_acc:.2f}%"
)

# Testing
print("Testing..")
arcee.milestone("test")
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
arcee.send({"test_accuracy": 100 * correct / total})
print(f"Test Accuracy: {100 * correct / total:.2f}%")


if __name__ == "__main__":
main()
42 changes: 41 additions & 1 deletion kiroframe_arcee/collectors/hardware.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import math
import os
import concurrent.futures
Expand All @@ -13,6 +14,8 @@
_MEASURE_TIME = 1 - 1 / (os.cpu_count())
_TIME_INTERVALS = (_MEASURE_TIME + 0.05, _MEASURE_TIME + 0.01)

BYTES_IN_KiB = 1024


class Collector:
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
Expand Down Expand Up @@ -112,6 +115,43 @@ def _collect_stats(cls):
"proc": ps_info,
}

@staticmethod
async def _io(interval: float = 1):
def _get_io():
disk = psutil.disk_io_counters()
net = psutil.net_io_counters()
return {
"disk_read": disk.read_bytes,
"disk_write": disk.write_bytes,
"net_sent": net.bytes_sent,
"net_recv": net.bytes_recv
}

before = _get_io()
await asyncio.sleep(interval)
after = _get_io()
disk_read = round(
(after['disk_read'] - before['disk_read']) / BYTES_IN_KiB, 2)
disk_write = round(
(after['disk_write'] - before['disk_write']) / BYTES_IN_KiB, 2)
net_sent = round(
(after['net_sent'] - before['net_sent']) / BYTES_IN_KiB, 2)
net_recv = round(
(after['net_recv'] - before['net_recv']) / BYTES_IN_KiB, 2)
return disk_read, disk_write, net_sent, net_recv

@classmethod
async def collect_stats(cls):
return await run_async(cls._collect_stats, executor=cls.executor)
disk_read, disk_write, net_sent, net_recv = await cls._io()
result = {
# IO stats in KiB/s
"io_stats": {
"disk_read": disk_read,
"disk_write": disk_write,
"net_sent": net_sent,
"net_recv": net_recv,
}
}
ps_stats = await run_async(cls._collect_stats, executor=cls.executor)
result.update(ps_stats)
return result
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# setup.cfg
[metadata]
name = kiroframe_arcee
version = 0.1.50
version = 0.1.51
author = Hystax
description = ML profiling tool for Kiroframe
long_description = file: README.md
Expand Down