@@ -20,9 +20,11 @@ use futures::stream::FuturesUnordered;
20
20
use gateway_client:: Client as MgsClient ;
21
21
use gateway_client:: types:: SpIdentifier ;
22
22
use gateway_client:: types:: SpIgnition ;
23
+ use gateway_types:: component:: SpType ;
23
24
use internal_dns_resolver:: Resolver ;
24
25
use internal_dns_types:: names:: ServiceName ;
25
26
use nexus_db_model:: Ereport ;
27
+ use nexus_db_model:: Sled ;
26
28
use nexus_db_model:: SupportBundle ;
27
29
use nexus_db_model:: SupportBundleState ;
28
30
use nexus_db_queries:: authz;
@@ -47,9 +49,11 @@ use omicron_uuid_kinds::SledUuid;
47
49
use omicron_uuid_kinds:: SupportBundleUuid ;
48
50
use omicron_uuid_kinds:: ZpoolUuid ;
49
51
use parallel_task_set:: ParallelTaskSet ;
52
+ use serde:: Serialize ;
50
53
use serde_json:: json;
51
54
use sha2:: { Digest , Sha256 } ;
52
55
use slog_error_chain:: InlineErrorChain ;
56
+ use std:: collections:: BTreeMap ;
53
57
use std:: future:: Future ;
54
58
use std:: io:: Write ;
55
59
use std:: num:: NonZeroU64 ;
@@ -61,6 +65,7 @@ use tokio::io::AsyncWriteExt;
61
65
use tokio:: io:: SeekFrom ;
62
66
use tokio_util:: task:: AbortOnDropHandle ;
63
67
use tufaceous_artifact:: ArtifactHash ;
68
+ use uuid:: Uuid ;
64
69
use zip:: ZipArchive ;
65
70
use zip:: ZipWriter ;
66
71
use zip:: write:: FullFileOptions ;
@@ -707,23 +712,44 @@ impl BundleCollection {
707
712
None
708
713
} ;
709
714
710
- let sp_dumps_dir = dir. path ( ) . join ( "sp_task_dumps" ) ;
711
- tokio:: fs:: create_dir_all ( & sp_dumps_dir) . await . with_context ( || {
712
- format ! ( "failed to create SP task dump directory {sp_dumps_dir}" )
713
- } ) ?;
714
- if let Err ( e) =
715
- save_all_sp_dumps ( log, & self . resolver , & sp_dumps_dir) . await
716
- {
717
- error ! ( log, "failed to capture SP task dumps" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
718
- } else {
719
- report. listed_sps = true ;
720
- } ;
721
-
722
- if let Ok ( all_sleds) = self
715
+ let all_sleds = self
723
716
. datastore
724
717
. sled_list_all_batched ( & self . opctx , SledFilter :: InService )
718
+ . await ;
719
+
720
+ if let Some ( mgs_client) = self . create_mgs_client ( ) . await {
721
+ if let Err ( e) = write_sled_info (
722
+ & self . log ,
723
+ & mgs_client,
724
+ all_sleds. as_deref ( ) . ok ( ) ,
725
+ dir. path ( ) ,
726
+ )
725
727
. await
726
- {
728
+ {
729
+ error ! ( log, "Failed to write sled_info.json" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
730
+ }
731
+
732
+ let sp_dumps_dir = dir. path ( ) . join ( "sp_task_dumps" ) ;
733
+ tokio:: fs:: create_dir_all ( & sp_dumps_dir) . await . with_context (
734
+ || {
735
+ format ! (
736
+ "Failed to create SP task dump directory {sp_dumps_dir}"
737
+ )
738
+ } ,
739
+ ) ?;
740
+
741
+ if let Err ( e) =
742
+ save_all_sp_dumps ( log, & mgs_client, & sp_dumps_dir) . await
743
+ {
744
+ error ! ( log, "Failed to capture SP task dumps" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
745
+ } else {
746
+ report. listed_sps = true ;
747
+ } ;
748
+ } else {
749
+ warn ! ( log, "No MGS client, skipping SP task dump collection" ) ;
750
+ }
751
+
752
+ if let Ok ( all_sleds) = all_sleds {
727
753
report. listed_in_service_sleds = true ;
728
754
729
755
const MAX_CONCURRENT_SLED_REQUESTS : usize = 16 ;
@@ -1031,6 +1057,23 @@ impl BundleCollection {
1031
1057
) ;
1032
1058
Ok ( ( ) )
1033
1059
}
1060
+
1061
+ async fn create_mgs_client ( & self ) -> Option < MgsClient > {
1062
+ match self
1063
+ . resolver
1064
+ . lookup_socket_v6 ( ServiceName :: ManagementGatewayService )
1065
+ . await
1066
+ . map ( |sockaddr| {
1067
+ let url = format ! ( "http://{}" , sockaddr) ;
1068
+ gateway_client:: Client :: new ( & url, self . log . clone ( ) )
1069
+ } ) {
1070
+ Ok ( r) => Some ( r) ,
1071
+ Err ( e) => {
1072
+ error ! ( self . log, "failed to resolve MGS address" ; "error" => InlineErrorChain :: new( & e) ) ;
1073
+ None
1074
+ }
1075
+ }
1076
+ }
1034
1077
}
1035
1078
1036
1079
impl BackgroundTask for SupportBundleCollector {
@@ -1316,18 +1359,9 @@ where
1316
1359
/// Collect task dumps from all SPs via MGS and save them to a directory.
1317
1360
async fn save_all_sp_dumps (
1318
1361
log : & slog:: Logger ,
1319
- resolver : & Resolver ,
1362
+ mgs_client : & MgsClient ,
1320
1363
sp_dumps_dir : & Utf8Path ,
1321
1364
) -> anyhow:: Result < ( ) > {
1322
- let mgs_client = resolver
1323
- . lookup_socket_v6 ( ServiceName :: ManagementGatewayService )
1324
- . await
1325
- . map ( |sockaddr| {
1326
- let url = format ! ( "http://{}" , sockaddr) ;
1327
- gateway_client:: Client :: new ( & url, log. clone ( ) )
1328
- } )
1329
- . context ( "failed to resolve address of MGS" ) ?;
1330
-
1331
1365
let available_sps = get_available_sps ( & mgs_client) . await ?;
1332
1366
1333
1367
let mut tasks = ParallelTaskSet :: new ( ) ;
@@ -1412,6 +1446,83 @@ async fn save_sp_dumps(
1412
1446
Ok ( ( ) )
1413
1447
}
1414
1448
1449
+ /// Write a file with a JSON mapping of sled serial numbers to cubby and UUIDs for easier
1450
+ /// identification of sleds present in a bundle.
1451
+ async fn write_sled_info (
1452
+ log : & slog:: Logger ,
1453
+ mgs_client : & MgsClient ,
1454
+ nexus_sleds : Option < & [ Sled ] > ,
1455
+ dir : & Utf8Path ,
1456
+ ) -> anyhow:: Result < ( ) > {
1457
+ #[ derive( Serialize ) ]
1458
+ struct SledInfo {
1459
+ cubby : Option < u16 > ,
1460
+ uuid : Option < Uuid > ,
1461
+ }
1462
+
1463
+ let available_sps = get_available_sps ( & mgs_client)
1464
+ . await
1465
+ . context ( "failed to get available SPs" ) ?;
1466
+
1467
+ // We can still get a useful mapping of cubby to serial using just the data from MGS.
1468
+ let mut nexus_map: BTreeMap < _ , _ > = nexus_sleds
1469
+ . unwrap_or_default ( )
1470
+ . into_iter ( )
1471
+ . map ( |sled| ( sled. serial_number ( ) , sled) )
1472
+ . collect ( ) ;
1473
+
1474
+ let mut sled_info = BTreeMap :: new ( ) ;
1475
+ for sp in
1476
+ available_sps. into_iter ( ) . filter ( |sp| matches ! ( sp. type_, SpType :: Sled ) )
1477
+ {
1478
+ let sp_state = match mgs_client. sp_get ( & sp. type_ , sp. slot ) . await {
1479
+ Ok ( s) => s. into_inner ( ) ,
1480
+ Err ( e) => {
1481
+ error ! ( log,
1482
+ "Failed to get SP state for sled_info.json" ;
1483
+ "cubby" => sp. slot,
1484
+ "component" => %sp. type_,
1485
+ "error" => InlineErrorChain :: new( & e)
1486
+ ) ;
1487
+ continue ;
1488
+ }
1489
+ } ;
1490
+
1491
+ if let Some ( sled) = nexus_map. remove ( sp_state. serial_number . as_str ( ) ) {
1492
+ sled_info. insert (
1493
+ sp_state. serial_number . to_string ( ) ,
1494
+ SledInfo {
1495
+ cubby : Some ( sp. slot ) ,
1496
+ uuid : Some ( * sled. identity . id . as_untyped_uuid ( ) ) ,
1497
+ } ,
1498
+ ) ;
1499
+ } else {
1500
+ sled_info. insert (
1501
+ sp_state. serial_number . to_string ( ) ,
1502
+ SledInfo { cubby : Some ( sp. slot ) , uuid : None } ,
1503
+ ) ;
1504
+ }
1505
+ }
1506
+
1507
+ // Sleds not returned by MGS.
1508
+ for ( serial, sled) in nexus_map {
1509
+ sled_info. insert (
1510
+ serial. to_string ( ) ,
1511
+ SledInfo {
1512
+ cubby : None ,
1513
+ uuid : Some ( * sled. identity . id . as_untyped_uuid ( ) ) ,
1514
+ } ,
1515
+ ) ;
1516
+ }
1517
+ let json_value = serde_json:: to_value ( sled_info)
1518
+ . context ( "failed to serialize sled info to JSON" ) ?;
1519
+ let json_str = serde_json:: to_string_pretty ( & json_value)
1520
+ . context ( "failed to pretty print sled info JSON" ) ?;
1521
+ tokio:: fs:: write ( dir. join ( "sled_info.json" ) , json_str) . await ?;
1522
+
1523
+ Ok ( ( ) )
1524
+ }
1525
+
1415
1526
fn is_fs_safe_single_path_component ( s : & str ) -> bool {
1416
1527
// Might be path traversal...
1417
1528
if s == "." || s == ".." {
0 commit comments