Skip to content

Commit 23c2038

Browse files
committed
Add mapping of cubby to sled ID to support bundles
Currently support bundles contain the sled UUID in file paths, and serial number in `sled.txt`. However, when accessing the sled via the tech port we generally refer to a sled by its cubby. To make identification of sleds simpler, add a new `sled_info.json` file to the bundle with a JSON-encoded mapping of sled serial to cubby and UUID.
1 parent 76ec1fa commit 23c2038

File tree

3 files changed

+137
-24
lines changed

3 files changed

+137
-24
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nexus/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ dropshot.workspace = true
3737
fatfs.workspace = true
3838
futures.workspace = true
3939
gateway-client.workspace = true
40+
gateway-types.workspace = true
4041
headers.workspace = true
4142
hex.workspace = true
4243
hickory-resolver.workspace = true

nexus/src/app/background/tasks/support_bundle_collector.rs

Lines changed: 135 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ use futures::stream::FuturesUnordered;
2020
use gateway_client::Client as MgsClient;
2121
use gateway_client::types::SpIdentifier;
2222
use gateway_client::types::SpIgnition;
23+
use gateway_types::component::SpType;
2324
use internal_dns_resolver::Resolver;
2425
use internal_dns_types::names::ServiceName;
2526
use nexus_db_model::Ereport;
27+
use nexus_db_model::Sled;
2628
use nexus_db_model::SupportBundle;
2729
use nexus_db_model::SupportBundleState;
2830
use nexus_db_queries::authz;
@@ -47,9 +49,11 @@ use omicron_uuid_kinds::SledUuid;
4749
use omicron_uuid_kinds::SupportBundleUuid;
4850
use omicron_uuid_kinds::ZpoolUuid;
4951
use parallel_task_set::ParallelTaskSet;
52+
use serde::Serialize;
5053
use serde_json::json;
5154
use sha2::{Digest, Sha256};
5255
use slog_error_chain::InlineErrorChain;
56+
use std::collections::BTreeMap;
5357
use std::future::Future;
5458
use std::io::Write;
5559
use std::num::NonZeroU64;
@@ -61,6 +65,7 @@ use tokio::io::AsyncWriteExt;
6165
use tokio::io::SeekFrom;
6266
use tokio_util::task::AbortOnDropHandle;
6367
use tufaceous_artifact::ArtifactHash;
68+
use uuid::Uuid;
6469
use zip::ZipArchive;
6570
use zip::ZipWriter;
6671
use zip::write::FullFileOptions;
@@ -707,23 +712,44 @@ impl BundleCollection {
707712
None
708713
};
709714

710-
let sp_dumps_dir = dir.path().join("sp_task_dumps");
711-
tokio::fs::create_dir_all(&sp_dumps_dir).await.with_context(|| {
712-
format!("failed to create SP task dump directory {sp_dumps_dir}")
713-
})?;
714-
if let Err(e) =
715-
save_all_sp_dumps(log, &self.resolver, &sp_dumps_dir).await
716-
{
717-
error!(log, "failed to capture SP task dumps"; "error" => InlineErrorChain::new(e.as_ref()));
718-
} else {
719-
report.listed_sps = true;
720-
};
721-
722-
if let Ok(all_sleds) = self
715+
let all_sleds = self
723716
.datastore
724717
.sled_list_all_batched(&self.opctx, SledFilter::InService)
718+
.await;
719+
720+
if let Some(mgs_client) = self.create_mgs_client().await {
721+
if let Err(e) = write_sled_info(
722+
&self.log,
723+
&mgs_client,
724+
all_sleds.as_deref().ok(),
725+
dir.path(),
726+
)
725727
.await
726-
{
728+
{
729+
error!(log, "Failed to write sled_info.json"; "error" => InlineErrorChain::new(e.as_ref()));
730+
}
731+
732+
let sp_dumps_dir = dir.path().join("sp_task_dumps");
733+
tokio::fs::create_dir_all(&sp_dumps_dir).await.with_context(
734+
|| {
735+
format!(
736+
"Failed to create SP task dump directory {sp_dumps_dir}"
737+
)
738+
},
739+
)?;
740+
741+
if let Err(e) =
742+
save_all_sp_dumps(log, &mgs_client, &sp_dumps_dir).await
743+
{
744+
error!(log, "Failed to capture SP task dumps"; "error" => InlineErrorChain::new(e.as_ref()));
745+
} else {
746+
report.listed_sps = true;
747+
};
748+
} else {
749+
warn!(log, "No MGS client, skipping SP task dump collection");
750+
}
751+
752+
if let Ok(all_sleds) = all_sleds {
727753
report.listed_in_service_sleds = true;
728754

729755
const MAX_CONCURRENT_SLED_REQUESTS: usize = 16;
@@ -1031,6 +1057,23 @@ impl BundleCollection {
10311057
);
10321058
Ok(())
10331059
}
1060+
1061+
async fn create_mgs_client(&self) -> Option<MgsClient> {
1062+
match self
1063+
.resolver
1064+
.lookup_socket_v6(ServiceName::ManagementGatewayService)
1065+
.await
1066+
.map(|sockaddr| {
1067+
let url = format!("http://{}", sockaddr);
1068+
gateway_client::Client::new(&url, self.log.clone())
1069+
}) {
1070+
Ok(r) => Some(r),
1071+
Err(e) => {
1072+
error!(self.log, "failed to resolve MGS address"; "error" => InlineErrorChain::new(&e));
1073+
None
1074+
}
1075+
}
1076+
}
10341077
}
10351078

10361079
impl BackgroundTask for SupportBundleCollector {
@@ -1316,18 +1359,9 @@ where
13161359
/// Collect task dumps from all SPs via MGS and save them to a directory.
13171360
async fn save_all_sp_dumps(
13181361
log: &slog::Logger,
1319-
resolver: &Resolver,
1362+
mgs_client: &MgsClient,
13201363
sp_dumps_dir: &Utf8Path,
13211364
) -> anyhow::Result<()> {
1322-
let mgs_client = resolver
1323-
.lookup_socket_v6(ServiceName::ManagementGatewayService)
1324-
.await
1325-
.map(|sockaddr| {
1326-
let url = format!("http://{}", sockaddr);
1327-
gateway_client::Client::new(&url, log.clone())
1328-
})
1329-
.context("failed to resolve address of MGS")?;
1330-
13311365
let available_sps = get_available_sps(&mgs_client).await?;
13321366

13331367
let mut tasks = ParallelTaskSet::new();
@@ -1412,6 +1446,83 @@ async fn save_sp_dumps(
14121446
Ok(())
14131447
}
14141448

1449+
/// Write a file with a JSON mapping of sled serial numbers to cubby and UUIDs for easier
1450+
/// identification of sleds present in a bundle.
1451+
async fn write_sled_info(
1452+
log: &slog::Logger,
1453+
mgs_client: &MgsClient,
1454+
nexus_sleds: Option<&[Sled]>,
1455+
dir: &Utf8Path,
1456+
) -> anyhow::Result<()> {
1457+
#[derive(Serialize)]
1458+
struct SledInfo {
1459+
cubby: Option<u16>,
1460+
uuid: Option<Uuid>,
1461+
}
1462+
1463+
let available_sps = get_available_sps(&mgs_client)
1464+
.await
1465+
.context("failed to get available SPs")?;
1466+
1467+
// We can still get a useful mapping of cubby to serial using just the data from MGS.
1468+
let mut nexus_map: BTreeMap<_, _> = nexus_sleds
1469+
.unwrap_or_default()
1470+
.into_iter()
1471+
.map(|sled| (sled.serial_number(), sled))
1472+
.collect();
1473+
1474+
let mut sled_info = BTreeMap::new();
1475+
for sp in
1476+
available_sps.into_iter().filter(|sp| matches!(sp.type_, SpType::Sled))
1477+
{
1478+
let sp_state = match mgs_client.sp_get(&sp.type_, sp.slot).await {
1479+
Ok(s) => s.into_inner(),
1480+
Err(e) => {
1481+
error!(log,
1482+
"Failed to get SP state for sled_info.json";
1483+
"cubby" => sp.slot,
1484+
"component" => %sp.type_,
1485+
"error" => InlineErrorChain::new(&e)
1486+
);
1487+
continue;
1488+
}
1489+
};
1490+
1491+
if let Some(sled) = nexus_map.remove(sp_state.serial_number.as_str()) {
1492+
sled_info.insert(
1493+
sp_state.serial_number.to_string(),
1494+
SledInfo {
1495+
cubby: Some(sp.slot),
1496+
uuid: Some(*sled.identity.id.as_untyped_uuid()),
1497+
},
1498+
);
1499+
} else {
1500+
sled_info.insert(
1501+
sp_state.serial_number.to_string(),
1502+
SledInfo { cubby: Some(sp.slot), uuid: None },
1503+
);
1504+
}
1505+
}
1506+
1507+
// Sleds not returned by MGS.
1508+
for (serial, sled) in nexus_map {
1509+
sled_info.insert(
1510+
serial.to_string(),
1511+
SledInfo {
1512+
cubby: None,
1513+
uuid: Some(*sled.identity.id.as_untyped_uuid()),
1514+
},
1515+
);
1516+
}
1517+
let json_value = serde_json::to_value(sled_info)
1518+
.context("failed to serialize sled info to JSON")?;
1519+
let json_str = serde_json::to_string_pretty(&json_value)
1520+
.context("failed to pretty print sled info JSON")?;
1521+
tokio::fs::write(dir.join("sled_info.json"), json_str).await?;
1522+
1523+
Ok(())
1524+
}
1525+
14151526
fn is_fs_safe_single_path_component(s: &str) -> bool {
14161527
// Might be path traversal...
14171528
if s == "." || s == ".." {

0 commit comments

Comments
 (0)