@@ -69,7 +69,9 @@ class InfrahubDockerCompose(DockerCompose):
69
69
deployment_type : str | None = None
70
70
71
71
@classmethod
72
- def init (cls , directory : Path | None = None , version : str | None = None , deployment_type : str | None = None ) -> Self :
72
+ def init (
73
+ cls , directory : Path | None = None , version : str | None = None , deployment_type : str | None = None
74
+ ) -> Self :
73
75
if not directory :
74
76
directory = Path .cwd ()
75
77
@@ -112,7 +114,9 @@ def generate_project_name(cls) -> str:
112
114
113
115
def create_docker_file (self , directory : Path ) -> Path :
114
116
current_directory = Path (__file__ ).resolve ().parent
115
- compose_file_name = "docker-compose-cluster.test.yml" if self .deployment_type == "cluster" else "docker-compose.test.yml"
117
+ compose_file_name = (
118
+ "docker-compose-cluster.test.yml" if self .deployment_type == "cluster" else "docker-compose.test.yml"
119
+ )
116
120
compose_file = current_directory / compose_file_name
117
121
118
122
test_compose_file = directory / "docker-compose.yml"
@@ -235,7 +239,7 @@ def database_create_backup(self, backup_name: str = "neo4j_database.backup", des
235
239
dest_dir / backup_name ,
236
240
)
237
241
238
- def database_restore_backup (self , backup_file : Path ) -> None :
242
+ def database_restore_backup (self , backup_file : Path ) -> None : # noqa: PLR0915
239
243
assert self .use_neo4j_enterprise
240
244
241
245
shutil .copy (
@@ -244,7 +248,7 @@ def database_restore_backup(self, backup_file: Path) -> None:
244
248
)
245
249
service_name = "database"
246
250
247
- if self .deployment_type != "cluster" :
251
+ if self .deployment_type != "cluster" : # noqa: PLR1702
248
252
try :
249
253
self .get_container (service_name = service_name )
250
254
except ContainerIsNotRunning :
@@ -351,8 +355,11 @@ def database_restore_backup(self, backup_file: Path) -> None:
351
355
352
356
self .exec_in_container (
353
357
command = [
354
- "neo4j-admin" , "database" , "restore" ,
355
- "--from-path" , str (self .internal_backup_dir / backup_file .name ),
358
+ "neo4j-admin" ,
359
+ "database" ,
360
+ "restore" ,
361
+ "--from-path" ,
362
+ str (self .internal_backup_dir / backup_file .name ),
356
363
"neo4j" ,
357
364
],
358
365
service_name = service_name ,
@@ -361,42 +368,124 @@ def database_restore_backup(self, backup_file: Path) -> None:
361
368
cmd = self .compose_command_property [:]
362
369
cmd += ["restart" , "database" ]
363
370
self ._run_command (cmd = cmd )
364
- # Waiting for cluster to stabilize...
365
- time .sleep (10 )
371
+
372
+ main_node = service_name
373
+ cluster_nodes = ["database" , "database-core2" , "database-core3" ]
374
+
375
+ for attempt in range (3 ):
376
+ if attempt > 0 :
377
+ time .sleep (10 )
378
+
379
+ (stdout , _ , _ ) = self .exec_in_container (
380
+ command = [
381
+ "cypher-shell" ,
382
+ "--format" ,
383
+ "plain" ,
384
+ "-d" ,
385
+ "system" ,
386
+ "-u" ,
387
+ "neo4j" ,
388
+ "-p" ,
389
+ "admin" ,
390
+ "SHOW DATABASES YIELD name, address, currentStatus WHERE name = 'system' RETURN address, currentStatus" ,
391
+ ],
392
+ service_name = main_node ,
393
+ )
394
+
395
+ raw_output = stdout
396
+ nodes_status = dict .fromkeys (cluster_nodes , False )
397
+ online_count = 0
398
+ total_entries = 0
399
+
400
+ try :
401
+ for line_raw in stdout .splitlines ():
402
+ line = line_raw .strip ()
403
+ if not line or line .startswith ("address" ):
404
+ continue
405
+
406
+ total_entries += 1
407
+ if "online" in line :
408
+ online_count += 1
409
+ for node in cluster_nodes :
410
+ node_pattern = f'"{ node } :'
411
+ if node_pattern in line :
412
+ nodes_status [node ] = True
413
+ break
414
+ if all (nodes_status .values ()) and online_count == len (cluster_nodes ):
415
+ break
416
+ except Exception as e :
417
+ print (f"Error parsing database status on attempt { attempt + 1 } : { e } " )
418
+
419
+ print (f"Waiting for all nodes to be online. Current status: { nodes_status } " )
420
+ time .sleep (5 )
421
+ else :
422
+ debug_logs = {}
423
+ for node in cluster_nodes :
424
+ try :
425
+ (logs , _ , _ ) = self .exec_in_container (
426
+ command = ["cat" , "logs/debug.log" ],
427
+ service_name = node ,
428
+ )
429
+ debug_logs [node ] = logs
430
+ except Exception as e :
431
+ debug_logs [node ] = f"Could not retrieve logs: { str (e )} "
432
+
433
+ debug_info = f"Raw output from SHOW DATABASES command:\n { raw_output } \n \n "
434
+ debug_info += f"Final node status: { nodes_status } \n \n "
435
+
436
+ status_str = ", " .join (
437
+ [f"{ node } : { 'online' if status else 'offline' } " for node , status in nodes_status .items ()]
438
+ )
439
+ logs_str = debug_info + "\n \n " .join (
440
+ [f"--- { node } logs ---\n { logs } " for node , logs in debug_logs .items ()]
441
+ )
442
+
443
+ raise Exception (
444
+ f"Failed to restore database cluster. Node status: { status_str } \n Debug logs:\n { logs_str } "
445
+ )
446
+
366
447
367
448
server_id = None
368
449
try :
369
450
stdout , _ , _ = self .exec_in_container (
370
451
command = [
371
- "cypher-shell" , "-d" , "system" , "-u" , "neo4j" , "-p" , "admin" ,
372
- 'SHOW SERVERS YIELD name, address WHERE address = "database:7687" RETURN name;'
452
+ "cypher-shell" ,
453
+ "--format" ,
454
+ "plain" ,
455
+ "-d" ,
456
+ "system" ,
457
+ "-u" ,
458
+ "neo4j" ,
459
+ "-p" ,
460
+ "admin" ,
461
+ 'SHOW SERVERS YIELD name, address WHERE address = "database:7687" RETURN name;' ,
373
462
],
374
463
service_name = service_name ,
375
464
)
376
- print (f"Server ID query result: { stdout } " )
377
465
378
- lines = [line .strip () for line in stdout .splitlines () if line .strip () and not line .startswith ("+" )]
379
- for line in lines :
380
- if line .startswith ('"' ) and line .endswith ('"' ):
381
- server_id = line .strip ('"' )
382
- break
466
+ lines = stdout .splitlines ()
467
+ for line_raw in lines :
468
+ line = line_raw .strip ()
469
+ if not line or line == "name" or line .startswith ("+" ):
470
+ continue
471
+ server_id = line .strip ('"' )
472
+ break
383
473
except Exception as e :
384
474
print (f"Error retrieving server ID with direct query: { e } " )
385
475
386
476
if server_id :
387
- print (f"Using server ID: { server_id } as seeder" )
388
477
self .exec_in_container (
389
478
command = [
390
- "cypher-shell" , "-d" , "system" , "-u" , "neo4j" , "-p" , "admin" ,
391
- f"CREATE DATABASE neo4j TOPOLOGY 3 PRIMARIES OPTIONS {{ existingData: 'use', existingDataSeedInstance: '{ server_id } ' }};"
479
+ "cypher-shell" ,
480
+ "-d" ,
481
+ "system" ,
482
+ "-u" ,
483
+ "neo4j" ,
484
+ "-p" ,
485
+ "admin" ,
486
+ f"CREATE DATABASE neo4j TOPOLOGY 3 PRIMARIES OPTIONS {{ existingData: 'use', existingDataSeedInstance: '{ server_id } ' }};" ,
392
487
],
393
488
service_name = service_name ,
394
489
)
395
-
396
- print ("Checking database status..." )
397
- self .exec_in_container (
398
- command = ["cypher-shell" , "-d" , "system" , "-u" , "neo4j" , "-p" , "admin" , "SHOW DATABASE neo4j;" ],
399
- service_name = service_name ,
400
- )
401
490
self .start ()
402
- print ("Database restored successfully" )
491
+ print ("Database restored successfully" )
0 commit comments