55and provide recommendations for optimization.
66"""
77
8+ import logging
89import statistics
910from dataclasses import dataclass
1011from typing import Any , Dict , List , Optional , Tuple
1516
1617from cratedb_toolkit .admin .xmover .util .database import CrateDBClient
1718
19+ logger = logging .getLogger (__name__ )
20+
1821
1922def format_storage_size (size_gb : float ) -> str :
2023 """Format storage size with appropriate units and spacing"""
@@ -134,7 +137,7 @@ def get_table_distribution_detailed(self, table_identifier: str) -> Optional[Tab
134137 AND s.recovery['files']['percent'] = 0
135138 GROUP BY s.schema_name, s.table_name, s.node['name']
136139 ORDER BY s.node['name'] \
137- """
140+ """ # noqa: E501
138141
139142 result = self .client .execute_query (query , [schema_name , table_name ])
140143 rows = result .get ("rows" , [])
@@ -190,7 +193,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
190193 rprint (f"• Total Shards: { total_shards } ({ total_primary_shards } primary + { total_replica_shards } replica)" )
191194 rprint (f"• Total Documents: { total_documents :,} " )
192195 rprint (
193- f"• Node Coverage: { len (table_nodes )} /{ len (cluster_nodes )} nodes ({ len (table_nodes ) / len (cluster_nodes ) * 100 :.0f} %)"
196+ f"• Node Coverage: { len (table_nodes )} /{ len (cluster_nodes )} nodes "
197+ f"({ len (table_nodes ) / len (cluster_nodes ) * 100 :.0f} %)"
194198 )
195199
196200 if missing_nodes :
@@ -261,7 +265,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
261265 # Storage distribution analysis
262266 if storage_cv > 0.4 :
263267 rprint (
264- f"• [red]⚠ Storage Imbalance:[/red] Range { format_storage_size (min_storage )} -{ format_storage_size (max_storage )} per node (CV: { storage_cv :.2f} )"
268+ f"• [red]⚠ Storage Imbalance:[/red] Range "
269+ f"{ format_storage_size (min_storage )} -{ format_storage_size (max_storage )} per node (CV: { storage_cv :.2f} )"
265270 )
266271 else :
267272 rprint (f"• [green]✓ Storage Balance:[/green] Well distributed (CV: { storage_cv :.2f} )" )
@@ -306,11 +311,13 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
306311 for zone in sorted (zone_distribution .keys ()):
307312 zone_data = zone_distribution [zone ]
308313 rprint (
309- f"• { zone } : { zone_data ['nodes' ]} nodes, { zone_data ['shards' ]} shards, { format_storage_size (zone_data ['size' ])} "
314+ f"• { zone } : { zone_data ['nodes' ]} nodes, "
315+ f"{ zone_data ['shards' ]} shards, { format_storage_size (zone_data ['size' ])} "
310316 )
311317
312318 except Exception :
313- pass # Zone info not available
319+ # Zone info not available
320+ logger .exception ("Zone info not available" )
314321
315322 # Health Summary
316323 rprint ("\n [bold]💊 Health Summary[/bold]" )
@@ -377,7 +384,7 @@ def get_largest_tables_distribution(self, top_n: int = 10) -> List[TableDistribu
377384 AND s.recovery['files']['percent'] = 0
378385 GROUP BY s.schema_name, s.table_name, s.node['name']
379386 ORDER BY s.schema_name, s.table_name, s.node['name'] \
380- """
387+ """ # noqa: E501
381388
382389 result = self .client .execute_query (query , [top_n ])
383390
@@ -536,7 +543,8 @@ def detect_storage_imbalance(self, table: TableDistribution) -> Optional[Distrib
536543
537544 if overloaded_node and underloaded_node :
538545 recommendations .append (
539- f"Rebalance storage from { overloaded_node } ({ format_storage_size (max_size )} ) to { underloaded_node } ({ format_storage_size (min_size )} )"
546+ f"Rebalance storage from { overloaded_node } ({ format_storage_size (max_size )} ) "
547+ f"to { underloaded_node } ({ format_storage_size (min_size )} )"
540548 )
541549
542550 return DistributionAnomaly (
@@ -645,7 +653,7 @@ def detect_document_imbalance(self, table: TableDistribution) -> Optional[Distri
645653 recommendations = recommendations ,
646654 )
647655
648- def analyze_distribution (self , top_tables : int = 10 ) -> List [DistributionAnomaly ]:
656+ def analyze_distribution (self , top_tables : int = 10 ) -> Tuple [ List [DistributionAnomaly ], int ]:
649657 """Analyze shard distribution and return ranked anomalies"""
650658
651659 # Get table distributions
@@ -674,12 +682,13 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
674682
675683 if not anomalies :
676684 rprint (
677- f"[green]✓ No significant shard distribution anomalies detected in top { tables_analyzed } tables![/green]"
685+ f"[green]✓ No significant shard distribution anomalies "
686+ f"detected in top { tables_analyzed } tables![/green]"
678687 )
679688 return
680689
681690 # Show analysis scope
682- unique_tables = set ( anomaly .table .full_table_name for anomaly in anomalies )
691+ unique_tables = { anomaly .table .full_table_name for anomaly in anomalies }
683692 rprint (
684693 f"[blue]📋 Analyzed { tables_analyzed } largest tables, found issues in { len (unique_tables )} tables[/blue]"
685694 )
@@ -733,7 +742,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
733742 overloaded = [node for node , count in counts .items () if count == max_count ]
734743 underloaded = [node for node , count in counts .items () if count == min_count ]
735744 rprint (
736- f" [red]⚠ Issue:[/red] { overloaded [0 ]} has { max_count } shards while { underloaded [0 ]} has only { min_count } shards"
745+ f" [red]⚠ Issue:[/red] { overloaded [0 ]} has { max_count } shards "
746+ f"while { underloaded [0 ]} has only { min_count } shards"
737747 )
738748
739749 elif anomaly .anomaly_type == "Storage Imbalance" :
@@ -744,19 +754,20 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
744754 overloaded = [node for node , size in sizes .items () if size == max_size ][0 ]
745755 underloaded = [node for node , size in sizes .items () if size == min_size ][0 ]
746756 rprint (
747- f" [red]⚠ Issue:[/red] Storage ranges from { format_storage_size (min_size )} ({ underloaded } ) to { format_storage_size (max_size )} ({ overloaded } ) - { max_size / min_size :.1f} x difference"
757+ f" [red]⚠ Issue:[/red] Storage ranges from { format_storage_size (min_size )} ({ underloaded } ) " # noqa: E501
758+ f"to { format_storage_size (max_size )} ({ overloaded } ) - { max_size / min_size :.1f} x difference"
748759 )
749760
750761 elif anomaly .anomaly_type == "Node Coverage Issue" :
751762 if "nodes_without_shards" in anomaly .details :
752763 missing_nodes = anomaly .details ["nodes_without_shards" ]
753764 coverage_ratio = anomaly .details ["coverage_ratio" ]
754765 rprint (
755- f" [red]⚠ Issue:[/red] Table missing from { len (missing_nodes )} nodes ({ coverage_ratio :.0%} cluster coverage)"
756- )
757- rprint (
758- f" [dim] Missing from: { ', ' .join (missing_nodes [:3 ])} { '...' if len (missing_nodes ) > 3 else '' } [/dim]"
766+ f" [red]⚠ Issue:[/red] Table missing from { len (missing_nodes )} nodes "
767+ f"({ coverage_ratio :.0%} cluster coverage)"
759768 )
769+ ellipsis = "..." if len (missing_nodes ) > 3 else ""
770+ rprint (f" [dim] Missing from: { ', ' .join (missing_nodes [:3 ])} { ellipsis } [/dim]" )
760771
761772 elif anomaly .anomaly_type == "Document Imbalance" :
762773 if "document_counts" in anomaly .details :
@@ -765,7 +776,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
765776 max_docs = max (doc_counts .values ())
766777 ratio = max_docs / min_docs if min_docs > 0 else float ("inf" )
767778 rprint (
768- f" [red]⚠ Issue:[/red] Document counts range from { min_docs :,} to { max_docs :,} ({ ratio :.1f} x difference)"
779+ f" [red]⚠ Issue:[/red] Document counts range "
780+ f"from { min_docs :,} to { max_docs :,} ({ ratio :.1f} x difference)"
769781 )
770782
771783 # Show recommendations
@@ -774,7 +786,7 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
774786 rprint (f" • { rec } " )
775787
776788 # Summary statistics
777- unique_tables = set ( anomaly .table .full_table_name for anomaly in anomalies )
789+ unique_tables = { anomaly .table .full_table_name for anomaly in anomalies }
778790 rprint ("\n [dim]📊 Analysis Summary:[/dim]" )
779791 rprint (f"[dim]• Tables analyzed: { tables_analyzed } [/dim]" )
780792 rprint (f"[dim]• Tables with issues: { len (unique_tables )} [/dim]" )
0 commit comments