Skip to content

Commit fa226e7

Browse files
committed
feat: add torch Evaluation and data fetch events to the UI
1 parent 93d1b85 commit fa226e7

File tree

3 files changed

+94
-18
lines changed

3 files changed

+94
-18
lines changed

plugins/plugin-codeflare/src/components/Grid.tsx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@ ${status ? "Status: " + status : ""}
4545
private readonly cell = (event: GenericEvent, idx: number) => {
4646
return (
4747
<Tooltip key={idx} markdown={this.tooltipContent(event)}>
48-
<span className="kui--grid-cell" data-tag="badge" data-type={event.type} data-state={event.state}>
48+
<span
49+
className="kui--grid-cell"
50+
data-tag="badge"
51+
data-type={event.type.replace(/\s/g, "")}
52+
data-state={event.state}
53+
>
4954
<span data-tag="badge-circle"></span>
5055
</span>
5156
</Tooltip>

plugins/plugin-codeflare/src/controller/events/torch.ts

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import Event from "./Event"
1818

19-
type EventType = "Epoch" | "Iteration" | "Marker"
19+
type EventType = "Data Fetch" | "Data Uncompress" | "Evaluation" | "EvaluationStep" | "Epoch" | "Iteration" | "Marker"
2020
type Detail = { epoch: number; step: number; nSteps: number; ip: string }
2121
export type TorchEvent = Event<EventType, Detail>
2222

@@ -77,26 +77,65 @@ export function collateEvent(M: TorchEvent[], line: string) {
7777
return M
7878
}
7979

80-
const match = line.match(/ip=([\d.]+)\)\s+(Epoch|Iteration):\s+(\d+)%\|[^|]+\|\s(\d+)\/(\d+)/)
80+
// Data fetch/uncompress events
81+
const hackMatch = line.match(/ip=([\d.]+)\)\s+(\d+-\d+-\d+\s+\d+:\d+:\d+)\s+(getting data|unpacking)/)
82+
if (hackMatch) {
83+
const ip = hackMatch[1]
84+
const timestamp = new Date(hackMatch[2]).getTime()
85+
const name = `Torch Training on ${ip}`
86+
const type: EventType = hackMatch[3] === "unpacking" ? "Data Uncompress" : "Data Fetch"
87+
M.push(new TorchEventImpl(name, ip, type, 1, 1, 1, timestamp, "Done", line.slice(line.indexOf(hackMatch[3]))))
88+
}
89+
90+
// Torch Events
91+
const match = line.match(/ip=([\d.]+)\)\s+(Evaluation|Epoch|Iteration):\s+(\d+)%\|[^|]+\|\s(\d+)\/(\d+)/)
8192
if (match) {
8293
const ip = match[1]
8394
const type = match[2] as EventType
84-
// const percentage = parseInt(match[3], 10)
85-
const step = parseInt(match[4], 10) - (type === "Epoch" ? 0 : 1)
8695
const nSteps = parseInt(match[5], 10)
96+
const name = `Torch Training on ${ip}`
97+
98+
// re: the complex conditional (-)... Iteration markers are post
99+
// i.e. emitted upon completion, whereas Evaluation and Epoch are
100+
// pre, i.e. emitted upon commencement
101+
const step = parseInt(match[4], 10) - (type === "Iteration" ? 1 : 0)
87102

88103
const epoch =
89-
type === "Epoch"
104+
type === "Evaluation"
105+
? { step: -1, nSteps: -1, state: "InProgress" }
106+
: type === "Epoch"
90107
? { step, nSteps, state: "InProgress" }
91108
: findEpoch(M, ip) || { step: -1, nSteps: 0, state: "InProgress" }
92-
const name = `Torch Training on ${ip}`
93109
const timestampMarker = findPrevious(M, ip, "Marker", "Done")
94110
const timestamp = timestampMarker ? timestampMarker.timestamp : Date.now()
95111

96-
if (type === "Iteration") {
112+
if (type === "Evaluation") {
113+
if (step === 0) {
114+
M.push(new TorchEventImpl(name, ip, "Evaluation", step, nSteps, epoch.step, timestamp))
115+
for (let idx = 1; idx < nSteps; idx++) {
116+
// prefill
117+
M.push(new TorchEventImpl(name, ip, "EvaluationStep", idx, nSteps, epoch.step, timestamp, "Pending"))
118+
}
119+
} else {
120+
for (let idx = 1; idx <= step; idx++) {
121+
const priorEvaluationStep = findPrevious(M, ip, "EvaluationStep", "Pending", idx, epoch.step)
122+
if (priorEvaluationStep) {
123+
priorEvaluationStep.state = "Done"
124+
}
125+
}
126+
127+
if (step === nSteps) {
128+
const priorEvaluation = findPrevious(M, ip, "Evaluation", "InProgress", 0, epoch.step)
129+
if (priorEvaluation) {
130+
priorEvaluation.state = "Done"
131+
}
132+
}
133+
}
134+
return M
135+
} else if (type === "Iteration") {
97136
epoch.state = "InProgress"
98137
} else if (step > 0) {
99-
const thisEpoch = findEpoch(M, ip, "Pending", step)
138+
const thisEpoch = findPrevious(M, ip, type, "Pending", step)
100139
if (thisEpoch) {
101140
thisEpoch.state = "InProgress"
102141
}
@@ -145,7 +184,15 @@ export function collateEvent(M: TorchEvent[], line: string) {
145184
}
146185

147186
function sortFn(a: TorchEvent, b: TorchEvent) {
148-
return a.ip.localeCompare(b.ip) || a.epoch - b.epoch || a.step - b.step || a.type.localeCompare(b.type)
187+
const aIsEval = /^Evaluation/.test(a.type) ? 1 : 0
188+
const bIsEval = /^Evaluation/.test(b.type) ? 1 : 0
189+
return (
190+
a.ip.localeCompare(b.ip) ||
191+
aIsEval - bIsEval ||
192+
a.epoch - b.epoch ||
193+
a.step - b.step ||
194+
a.type.localeCompare(b.type)
195+
)
149196
}
150197

151198
/** @return lifecycle events (Epoch, Iteration) for Torch training */

plugins/plugin-codeflare/web/scss/components/Dashboard/Grid.scss

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@ $fullWidth: 1em; /* $large * ($unit + $rgap) - $rgap */
7070
}
7171
}
7272

73+
@mixin StepUI {
74+
@include CFCellContent {
75+
height: 80%;
76+
filter: saturate(0.5) brightness(0.85);
77+
}
78+
}
79+
7380
@include CFGrid {
7481
padding: $xlarge + px 1em;
7582
grid-row-gap: $rgap;
@@ -119,18 +126,35 @@ $fullWidth: 1em; /* $large * ($unit + $rgap) - $rgap */
119126

120127
@include CFCell(Iteration) {
121128
grid-column: span $tiny;
122-
@include CFCellContent {
123-
background-color: var(--color-base0D);
124-
filter: saturate(0.5) brightness(0.85);
125-
height: 80%;
126-
}
129+
@include Color(var(--color-base0D));
130+
@include StepUI;
127131
}
128132

129133
@include CFCell(Epoch) {
130134
grid-column: span $small;
131-
@include CFCellContent {
132-
background-color: var(--color-base0D);
133-
}
135+
@include Color(var(--color-base0D));
136+
}
137+
138+
@include CFCell(Evaluation) {
139+
grid-column: span $small;
140+
@include Color(var(--color-base0F));
141+
}
142+
143+
@include CFCell(EvaluationStep) {
144+
grid-column: span $tiny;
145+
@include Color(var(--color-base0F));
146+
@include StepUI;
147+
}
148+
149+
@include CFCell(DataFetch) {
150+
grid-column: span $small;
151+
@include Color(var(--color-base0E));
152+
}
153+
154+
@include CFCell(DataUncompress) {
155+
grid-column: span $small;
156+
@include StepUI;
157+
@include Color(var(--color-base0E));
134158
}
135159
}
136160

0 commit comments

Comments
 (0)