Skip to content

Commit ef7a819

Browse files
committed
fix: improved UX for attaching to running jobs
Instead of showing nothing for an extended period of time, we now open a window immediately and show some (for now textual) progress info. This PR also fixes some issues that could result in bogus output (`cannot read property runtime_env of undefined`) in job description.
1 parent 61eeda2 commit ef7a819

File tree

6 files changed

+98
-27
lines changed

6 files changed

+98
-27
lines changed

plugins/plugin-codeflare/src/controller/attach.ts

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* limitations under the License.
1515
*/
1616

17+
import Debug from "debug"
1718
import { MadWizardOptions } from "madwizard"
1819
import { Arguments, ParsedOptions } from "@kui-shell/core"
1920

@@ -31,7 +32,14 @@ type Options = ParsedOptions &
3132
* @return the path to the captured log directory and a cleanup
3233
* handler to be invoked when you are done consuming the output
3334
*/
34-
export async function attach(profile: string, jobId: boolean | string, opts: MadWizardOptions = {}) {
35+
export async function attach(
36+
profile: string,
37+
jobId: boolean | string,
38+
opts: MadWizardOptions = {},
39+
stdout: (line: string) => void
40+
) {
41+
const debug = Debug("plugin-codeflare/attach")
42+
3543
const deployGuidebook = "ml/ray/aggregator/in-cluster/client-side/deploy"
3644
const startGuidebook =
3745
typeof jobId === "string"
@@ -56,14 +64,23 @@ export async function attach(profile: string, jobId: boolean | string, opts: Mad
5664
opts
5765
)
5866

67+
const { cli } = await import("madwizard/dist/fe/cli")
68+
5969
// *deploy* the aggregator pod, then *start* an aggregator instance
6070
// for the given jobId
61-
const { cli } = await import("madwizard/dist/fe/cli")
62-
await cli(
63-
["codeflare", "guide", deployGuidebook],
64-
undefined,
65-
Object.assign({}, options, { name: "log-aggregator-deploy" })
66-
)
71+
try {
72+
const deployOptions = Object.assign({}, options, { name: "log-aggregator-deploy" })
73+
debug("Deploying log aggregator", deployGuidebook, deployOptions)
74+
stdout("Deploying log aggregator...\n")
75+
await cli(["codeflare", "guide", deployGuidebook], undefined, deployOptions)
76+
debug("deploying log aggregator: done")
77+
} catch (err) {
78+
console.error("Error attaching", err)
79+
throw err
80+
}
81+
82+
debug("attaching to", jobId)
83+
stdout("Attaching to job...\n")
6784
const resp = await cli(
6885
["codeflare", "guide", startGuidebook],
6986
undefined,
@@ -72,6 +89,9 @@ export async function attach(profile: string, jobId: boolean | string, opts: Mad
7289

7390
// the logdir that we captured
7491
const logdir = resp && resp.env ? resp.env.LOGDIR_STAGE : undefined
92+
if (logdir) {
93+
debug("successfully attached to", jobId, logdir)
94+
}
7595

7696
return {
7797
logdir,
@@ -89,12 +109,16 @@ export default async function attachCommand(args: Arguments<Options>) {
89109
const jobId = args.parsedOptions.a
90110
const profile = args.parsedOptions.p || (await Profiles.lastUsed())
91111

92-
const resp = await attach(profile, jobId, { verbose: args.parsedOptions.V })
112+
const stdout = await args.createOutputStream()
113+
const { logdir, cleanExit } = await attach(profile, jobId, { verbose: args.parsedOptions.V }, stdout)
114+
115+
if (logdir) {
116+
if (cleanExit) {
117+
window.addEventListener("beforeunload", () => cleanExit())
118+
}
93119

94-
if (resp.logdir) {
95-
process.env.LOGDIR = resp.logdir
96-
await args.REPL.qexec(`codeflare dashboardui -f ${encodeComponent(resp.logdir)}`)
97-
// TODO utilize resp.cleanExit?
120+
stdout("Opening Dashboard\n")
121+
return args.REPL.qexec(`codeflare dashboardui -f ${encodeComponent(logdir)}`)
98122
} else {
99123
throw new Error("Could not attach, due to missing logging directory")
100124
}

plugins/plugin-codeflare/src/controller/description.ts

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,24 @@
1616

1717
import { join } from "path"
1818
import { Arguments, Registrar, encodeComponent } from "@kui-shell/core"
19+
20+
import Status from "./status"
1921
import { expand } from "../lib/util"
2022

2123
type Item = { label: string; value: string }
2224

2325
export type Summary = Item[]
2426

25-
export type SummaryResponse = {
27+
type SummaryResponse = {
2628
jobid: string
29+
status: Status
30+
start_time: number
31+
end_time: number
2732
cmdline: {
2833
appPart: string
2934
systemPart: string
3035
}
31-
runtimeEnv: {
36+
runtime_env: {
3237
env_vars: {
3338
KUBE_CONTEXT: string
3439
KUBE_NS: string
@@ -48,11 +53,29 @@ function jobDefinition(filepath: string) {
4853
return encodeComponent(expand(join(filepath.replace(/'/g, ""), "ray-job-definition.json")))
4954
}
5055

51-
export async function getJobDefinition(runDir: string, REPL: Arguments["REPL"]) {
56+
export async function getJobDefinition(runDir: string, REPL: Arguments["REPL"], inRetry = 0): Promise<SummaryResponse> {
5257
try {
53-
return JSON.parse(await REPL.qexec<string>(`vfs fslice ${encodeComponent(jobDefinition(runDir))} 0`))
58+
const data = await REPL.qexec<string>(`vfs fslice ${jobDefinition(runDir)} 0`)
59+
try {
60+
return JSON.parse(data)
61+
} catch (err) {
62+
if (inRetry < 10 && typeof data === "string" && data.length === 0) {
63+
// some race condition; the vfs might not be ready; FIXME!!! hack here:
64+
await new Promise((resolve) => setTimeout(resolve, 100))
65+
return getJobDefinition(runDir, REPL, inRetry + 1)
66+
} else {
67+
console.error(
68+
"Error parsing job definition",
69+
jobDefinition(runDir),
70+
typeof data === "string" ? data.length : -1,
71+
data
72+
)
73+
throw err
74+
}
75+
}
5476
} catch (err) {
55-
console.error(runDir, err)
77+
console.error("Error loading job definition", runDir, err)
78+
throw err
5679
}
5780
}
5881

@@ -63,6 +86,10 @@ async function app(args: Arguments) {
6386
}
6487

6588
const jobInfo = await getJobDefinition(filepath, args.REPL)
89+
if (!jobInfo) {
90+
return "Error fetching job information"
91+
}
92+
6693
const RAY_IMAGE = jobInfo.runtime_env.env_vars ? jobInfo.runtime_env.env_vars.RAY_IMAGE : "Unknown"
6794

6895
const status = jobInfo.status.toLowerCase()

plugins/plugin-codeflare/src/controller/run/get.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import { setTabReadonly } from "@kui-shell/plugin-madwizard"
2323

2424
import { productName } from "@kui-shell/client/config.d/name.json"
2525

26+
import Status from "../status"
2627
import { width, height } from "../dashboard"
2728
import { getJobDefinition } from "../description"
2829

@@ -48,8 +49,6 @@ function capitalize(str: string) {
4849
return str[0].toUpperCase() + str.slice(1)
4950
}
5051

51-
/** TODO: these are specific to Ray */
52-
type Status = "SUCCEEDED" | "STOPPED" | "RUNNING" | "PENDING" | "ERROR"
5352
function statusColor(status: Status) {
5453
switch (status) {
5554
case "SUCCEEDED":
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/*
2+
* Copyright 2022 The Kubernetes Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
/** TODO: these are specific to Ray */
18+
type Status = "SUCCEEDED" | "STOPPED" | "RUNNING" | "PENDING" | "ERROR"
19+
20+
export default Status

plugins/plugin-codeflare/src/tray/menus/profiles/active-runs.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
* limitations under the License.
1515
*/
1616

17-
import { basename } from "path"
1817
import { MenuItemConstructorOptions } from "electron"
1918
import { CreateWindowFunction } from "@kui-shell/core"
19+
import { productName } from "@kui-shell/client/config.d/name.json"
2020

2121
import { rayIcon } from "../../icons"
2222
import UpdateFunction from "../../update"
@@ -27,10 +27,10 @@ import { runMenuItems } from "./runs"
2727
/** active run watcher per profile */
2828
const watchers: Record<string, ProfileActiveRunWatcher> = {}
2929

30-
/** This is the utility function that will open a CodeFlare Dashboard window, pointing to the given local filesystem `logdir` */
31-
async function openDashboard(createWindow: CreateWindowFunction, logdir: string, follow = true) {
32-
await createWindow(["codeflare", "dashboard", follow ? "-f" : "", logdir], {
33-
title: "CodeFlare Dashboard - " + basename(logdir),
30+
/** This is the utility function that will open a CodeFlare Dashboard window and attaches to the given job */
31+
async function openDashboard(createWindow: CreateWindowFunction, profile: string, runId: string, follow = true) {
32+
await createWindow(["codeflare", "dashboard", follow ? "-f" : "", "-a", runId, "-p", profile], {
33+
title: productName + " Dashboard - " + runId,
3434
})
3535
}
3636

@@ -53,7 +53,7 @@ async function openMenuItem(this: CreateWindowFunction, profile: string, runId:
5353
// ^^^ disabled... due to the FIXME
5454

5555
// otherwise, we will need to start of a log aggregator
56-
process.env.NO_WAIT = "true" // don't wait for job termination
56+
/* process.env.NO_WAIT = "true" // don't wait for job termination
5757
process.env.QUIET_CONSOLE = "true" // don't tee logs to the console
5858
const { attach } = await import("../../../controller/attach")
5959
const { logdir, cleanExit } = await attach(profile, runId, { verbose: true })
@@ -72,7 +72,8 @@ async function openMenuItem(this: CreateWindowFunction, profile: string, runId:
7272
if (typeof cleanExit === "function") {
7373
cleanExit()
7474
}
75-
}
75+
} */
76+
await openDashboard(this, profile, runId)
7677
}
7778

7879
/** @return menu items that allow attaching to an active run in the given `profileName` */

plugins/plugin-codeflare/src/tray/watchers/profile/active-runs.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ export default class ProfileActiveRunWatcher {
3838
private readonly updateFn: UpdateFunction,
3939
private readonly profile: string,
4040
public readonly rayAddress = ProfileActiveRunWatcher.initWatcher(profile),
41-
private readonly timer = setInterval(() => this.updateActiveJobs(), 5000)
41+
private readonly timer = setInterval(() => this.updateActiveJobs(), 3000)
4242
) {
4343
onRun(() => this.updateActiveJobs())
4444
}

0 commit comments

Comments
 (0)