@@ -69,7 +69,9 @@ class InfrahubDockerCompose(DockerCompose):
69
69
deployment_type : str | None = None
70
70
71
71
@classmethod
72
- def init (cls , directory : Path | None = None , version : str | None = None , deployment_type : str | None = None ) -> Self :
72
+ def init (
73
+ cls , directory : Path | None = None , version : str | None = None , deployment_type : str | None = None
74
+ ) -> Self :
73
75
if not directory :
74
76
directory = Path .cwd ()
75
77
@@ -112,7 +114,9 @@ def generate_project_name(cls) -> str:
112
114
113
115
def create_docker_file (self , directory : Path ) -> Path :
114
116
current_directory = Path (__file__ ).resolve ().parent
115
- compose_file_name = "docker-compose-cluster.test.yml" if self .deployment_type == "cluster" else "docker-compose.test.yml"
117
+ compose_file_name = (
118
+ "docker-compose-cluster.test.yml" if self .deployment_type == "cluster" else "docker-compose.test.yml"
119
+ )
116
120
compose_file = current_directory / compose_file_name
117
121
118
122
test_compose_file = directory / "docker-compose.yml"
@@ -235,7 +239,7 @@ def database_create_backup(self, backup_name: str = "neo4j_database.backup", des
235
239
dest_dir / backup_name ,
236
240
)
237
241
238
- def database_restore_backup (self , backup_file : Path ) -> None :
242
+ def database_restore_backup (self , backup_file : Path ) -> None : # noqa: PLR0915
239
243
assert self .use_neo4j_enterprise
240
244
241
245
shutil .copy (
@@ -244,7 +248,7 @@ def database_restore_backup(self, backup_file: Path) -> None:
244
248
)
245
249
service_name = "database"
246
250
247
- if self .deployment_type != "cluster" :
251
+ if self .deployment_type != "cluster" : # noqa: PLR1702
248
252
try :
249
253
self .get_container (service_name = service_name )
250
254
except ContainerIsNotRunning :
@@ -351,8 +355,11 @@ def database_restore_backup(self, backup_file: Path) -> None:
351
355
352
356
self .exec_in_container (
353
357
command = [
354
- "neo4j-admin" , "database" , "restore" ,
355
- "--from-path" , str (self .internal_backup_dir / backup_file .name ),
358
+ "neo4j-admin" ,
359
+ "database" ,
360
+ "restore" ,
361
+ "--from-path" ,
362
+ str (self .internal_backup_dir / backup_file .name ),
356
363
"neo4j" ,
357
364
],
358
365
service_name = service_name ,
@@ -361,42 +368,125 @@ def database_restore_backup(self, backup_file: Path) -> None:
361
368
cmd = self .compose_command_property [:]
362
369
cmd += ["restart" , "database" ]
363
370
self ._run_command (cmd = cmd )
364
- # Waiting for cluster to stabilize...
365
- time .sleep (10 )
371
+
372
+ main_node = service_name
373
+ cluster_nodes = ["database" , "database-core2" , "database-core3" ]
374
+
375
+ for attempt in range (3 ):
376
+ try :
377
+ (stdout , _ , _ ) = self .exec_in_container (
378
+ command = [
379
+ "cypher-shell" ,
380
+ "--format" ,
381
+ "plain" ,
382
+ "-d" ,
383
+ "system" ,
384
+ "-u" ,
385
+ "neo4j" ,
386
+ "-p" ,
387
+ "admin" ,
388
+ "SHOW DATABASES YIELD name, address, currentStatus WHERE name = 'system' RETURN address, currentStatus" ,
389
+ ],
390
+ service_name = main_node ,
391
+ )
392
+ except Exception as e :
393
+ time .sleep (10 )
394
+ continue
395
+
396
+ raw_output = stdout
397
+ nodes_status = dict .fromkeys (cluster_nodes , False )
398
+ online_count = 0
399
+ total_entries = 0
400
+
401
+ try :
402
+ for line_raw in stdout .splitlines ():
403
+ line = line_raw .strip ()
404
+ if not line or line .startswith ("address" ):
405
+ continue
406
+
407
+ total_entries += 1
408
+ if "online" in line :
409
+ online_count += 1
410
+ for node in cluster_nodes :
411
+ node_pattern = f'"{ node } :'
412
+ if node_pattern in line :
413
+ nodes_status [node ] = True
414
+ break
415
+ if all (nodes_status .values ()) and online_count == len (cluster_nodes ):
416
+ break
417
+ except Exception as e :
418
+ print (f"Error parsing database status on attempt { attempt + 1 } : { e } " )
419
+
420
+ print (f"Waiting for all nodes to be online. Current status: { nodes_status } " )
421
+ time .sleep (5 )
422
+ else :
423
+ debug_logs = {}
424
+ for node in cluster_nodes :
425
+ try :
426
+ (logs , _ , _ ) = self .exec_in_container (
427
+ command = ["cat" , "logs/debug.log" ],
428
+ service_name = node ,
429
+ )
430
+ debug_logs [node ] = logs
431
+ except Exception as e :
432
+ debug_logs [node ] = f"Could not retrieve logs: { str (e )} "
433
+
434
+ debug_info = f"Raw output from SHOW DATABASES command:\n { raw_output } \n \n "
435
+ debug_info += f"Final node status: { nodes_status } \n \n "
436
+
437
+ status_str = ", " .join (
438
+ [f"{ node } : { 'online' if status else 'offline' } " for node , status in nodes_status .items ()]
439
+ )
440
+ logs_str = debug_info + "\n \n " .join (
441
+ [f"--- { node } logs ---\n { logs } " for node , logs in debug_logs .items ()]
442
+ )
443
+
444
+ raise Exception (
445
+ f"Failed to restore database cluster. Node status: { status_str } \n Debug logs:\n { logs_str } "
446
+ )
447
+
366
448
367
449
server_id = None
368
450
try :
369
451
stdout , _ , _ = self .exec_in_container (
370
452
command = [
371
- "cypher-shell" , "-d" , "system" , "-u" , "neo4j" , "-p" , "admin" ,
372
- 'SHOW SERVERS YIELD name, address WHERE address = "database:7687" RETURN name;'
453
+ "cypher-shell" ,
454
+ "--format" ,
455
+ "plain" ,
456
+ "-d" ,
457
+ "system" ,
458
+ "-u" ,
459
+ "neo4j" ,
460
+ "-p" ,
461
+ "admin" ,
462
+ 'SHOW SERVERS YIELD name, address WHERE address = "database:7687" RETURN name;' ,
373
463
],
374
464
service_name = service_name ,
375
465
)
376
- print (f"Server ID query result: { stdout } " )
377
466
378
- lines = [line .strip () for line in stdout .splitlines () if line .strip () and not line .startswith ("+" )]
379
- for line in lines :
380
- if line .startswith ('"' ) and line .endswith ('"' ):
381
- server_id = line .strip ('"' )
382
- break
467
+ lines = stdout .splitlines ()
468
+ for line_raw in lines :
469
+ line = line_raw .strip ()
470
+ if not line or line == "name" or line .startswith ("+" ):
471
+ continue
472
+ server_id = line .strip ('"' )
473
+ break
383
474
except Exception as e :
384
475
print (f"Error retrieving server ID with direct query: { e } " )
385
476
386
477
if server_id :
387
- print (f"Using server ID: { server_id } as seeder" )
388
478
self .exec_in_container (
389
479
command = [
390
- "cypher-shell" , "-d" , "system" , "-u" , "neo4j" , "-p" , "admin" ,
391
- f"CREATE DATABASE neo4j TOPOLOGY 3 PRIMARIES OPTIONS {{ existingData: 'use', existingDataSeedInstance: '{ server_id } ' }};"
480
+ "cypher-shell" ,
481
+ "-d" ,
482
+ "system" ,
483
+ "-u" ,
484
+ "neo4j" ,
485
+ "-p" ,
486
+ "admin" ,
487
+ f"CREATE DATABASE neo4j TOPOLOGY 3 PRIMARIES OPTIONS {{ existingData: 'use', existingDataSeedInstance: '{ server_id } ' }};" ,
392
488
],
393
489
service_name = service_name ,
394
490
)
395
-
396
- print ("Checking database status..." )
397
- self .exec_in_container (
398
- command = ["cypher-shell" , "-d" , "system" , "-u" , "neo4j" , "-p" , "admin" , "SHOW DATABASE neo4j;" ],
399
- service_name = service_name ,
400
- )
401
491
self .start ()
402
- print ("Database restored successfully" )
492
+ print ("Database restored successfully" )
0 commit comments