Skip to content

Commit 6a6b8f0

Browse files
committed
Admin/XMover: Code formatting. Linting. Type checking.
- More or less just line-length fixes. - Only a single type adjustment was needed on the return value of the `analyze_distribution` method. - Ruff recommended to use set comprehensions, so here we go. - At a single spot where an exception has been `pass`ed, we added error output. Is it bad?
1 parent 055e8ac commit 6a6b8f0

File tree

1 file changed

+30
-18
lines changed
  • cratedb_toolkit/admin/xmover/analysis

1 file changed

+30
-18
lines changed

cratedb_toolkit/admin/xmover/analysis/table.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
and provide recommendations for optimization.
66
"""
77

8+
import logging
89
import statistics
910
from dataclasses import dataclass
1011
from typing import Any, Dict, List, Optional, Tuple
@@ -15,6 +16,8 @@
1516

1617
from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
1718

19+
logger = logging.getLogger(__name__)
20+
1821

1922
def format_storage_size(size_gb: float) -> str:
2023
"""Format storage size with appropriate units and spacing"""
@@ -134,7 +137,7 @@ def get_table_distribution_detailed(self, table_identifier: str) -> Optional[Tab
134137
AND s.recovery['files']['percent'] = 0
135138
GROUP BY s.schema_name, s.table_name, s.node['name']
136139
ORDER BY s.node['name'] \
137-
"""
140+
""" # noqa: E501
138141

139142
result = self.client.execute_query(query, [schema_name, table_name])
140143
rows = result.get("rows", [])
@@ -190,7 +193,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
190193
rprint(f"• Total Shards: {total_shards} ({total_primary_shards} primary + {total_replica_shards} replica)")
191194
rprint(f"• Total Documents: {total_documents:,}")
192195
rprint(
193-
f"• Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes ({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)"
196+
f"• Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes "
197+
f"({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)"
194198
)
195199

196200
if missing_nodes:
@@ -261,7 +265,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
261265
# Storage distribution analysis
262266
if storage_cv > 0.4:
263267
rprint(
264-
f"• [red]⚠ Storage Imbalance:[/red] Range {format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})"
268+
f"• [red]⚠ Storage Imbalance:[/red] Range "
269+
f"{format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})"
265270
)
266271
else:
267272
rprint(f"• [green]✓ Storage Balance:[/green] Well distributed (CV: {storage_cv:.2f})")
@@ -306,11 +311,13 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None:
306311
for zone in sorted(zone_distribution.keys()):
307312
zone_data = zone_distribution[zone]
308313
rprint(
309-
f"• {zone}: {zone_data['nodes']} nodes, {zone_data['shards']} shards, {format_storage_size(zone_data['size'])}"
314+
f"• {zone}: {zone_data['nodes']} nodes, "
315+
f"{zone_data['shards']} shards, {format_storage_size(zone_data['size'])}"
310316
)
311317

312318
except Exception:
313-
pass # Zone info not available
319+
# Zone info not available
320+
logger.exception("Zone info not available")
314321

315322
# Health Summary
316323
rprint("\n[bold]💊 Health Summary[/bold]")
@@ -377,7 +384,7 @@ def get_largest_tables_distribution(self, top_n: int = 10) -> List[TableDistribu
377384
AND s.recovery['files']['percent'] = 0
378385
GROUP BY s.schema_name, s.table_name, s.node['name']
379386
ORDER BY s.schema_name, s.table_name, s.node['name'] \
380-
"""
387+
""" # noqa: E501
381388

382389
result = self.client.execute_query(query, [top_n])
383390

@@ -536,7 +543,8 @@ def detect_storage_imbalance(self, table: TableDistribution) -> Optional[Distrib
536543

537544
if overloaded_node and underloaded_node:
538545
recommendations.append(
539-
f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) to {underloaded_node} ({format_storage_size(min_size)})"
546+
f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) "
547+
f"to {underloaded_node} ({format_storage_size(min_size)})"
540548
)
541549

542550
return DistributionAnomaly(
@@ -645,7 +653,7 @@ def detect_document_imbalance(self, table: TableDistribution) -> Optional[Distri
645653
recommendations=recommendations,
646654
)
647655

648-
def analyze_distribution(self, top_tables: int = 10) -> List[DistributionAnomaly]:
656+
def analyze_distribution(self, top_tables: int = 10) -> Tuple[List[DistributionAnomaly], int]:
649657
"""Analyze shard distribution and return ranked anomalies"""
650658

651659
# Get table distributions
@@ -674,12 +682,13 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
674682

675683
if not anomalies:
676684
rprint(
677-
f"[green]✓ No significant shard distribution anomalies detected in top {tables_analyzed} tables![/green]"
685+
f"[green]✓ No significant shard distribution anomalies "
686+
f"detected in top {tables_analyzed} tables![/green]"
678687
)
679688
return
680689

681690
# Show analysis scope
682-
unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies)
691+
unique_tables = {anomaly.table.full_table_name for anomaly in anomalies}
683692
rprint(
684693
f"[blue]📋 Analyzed {tables_analyzed} largest tables, found issues in {len(unique_tables)} tables[/blue]"
685694
)
@@ -733,7 +742,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
733742
overloaded = [node for node, count in counts.items() if count == max_count]
734743
underloaded = [node for node, count in counts.items() if count == min_count]
735744
rprint(
736-
f" [red]⚠ Issue:[/red] {overloaded[0]} has {max_count} shards while {underloaded[0]} has only {min_count} shards"
745+
f" [red]⚠ Issue:[/red] {overloaded[0]} has {max_count} shards "
746+
f"while {underloaded[0]} has only {min_count} shards"
737747
)
738748

739749
elif anomaly.anomaly_type == "Storage Imbalance":
@@ -744,19 +754,20 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
744754
overloaded = [node for node, size in sizes.items() if size == max_size][0]
745755
underloaded = [node for node, size in sizes.items() if size == min_size][0]
746756
rprint(
747-
f" [red]⚠ Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference"
757+
f" [red]⚠ Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) " # noqa: E501
758+
f"to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference"
748759
)
749760

750761
elif anomaly.anomaly_type == "Node Coverage Issue":
751762
if "nodes_without_shards" in anomaly.details:
752763
missing_nodes = anomaly.details["nodes_without_shards"]
753764
coverage_ratio = anomaly.details["coverage_ratio"]
754765
rprint(
755-
f" [red]⚠ Issue:[/red] Table missing from {len(missing_nodes)} nodes ({coverage_ratio:.0%} cluster coverage)"
756-
)
757-
rprint(
758-
f" [dim] Missing from: {', '.join(missing_nodes[:3])}{'...' if len(missing_nodes) > 3 else ''}[/dim]"
766+
f" [red]⚠ Issue:[/red] Table missing from {len(missing_nodes)} nodes "
767+
f"({coverage_ratio:.0%} cluster coverage)"
759768
)
769+
ellipsis = "..." if len(missing_nodes) > 3 else ""
770+
rprint(f" [dim] Missing from: {', '.join(missing_nodes[:3])}{ellipsis}[/dim]")
760771

761772
elif anomaly.anomaly_type == "Document Imbalance":
762773
if "document_counts" in anomaly.details:
@@ -765,7 +776,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
765776
max_docs = max(doc_counts.values())
766777
ratio = max_docs / min_docs if min_docs > 0 else float("inf")
767778
rprint(
768-
f" [red]⚠ Issue:[/red] Document counts range from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)"
779+
f" [red]⚠ Issue:[/red] Document counts range "
780+
f"from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)"
769781
)
770782

771783
# Show recommendations
@@ -774,7 +786,7 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table
774786
rprint(f" • {rec}")
775787

776788
# Summary statistics
777-
unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies)
789+
unique_tables = {anomaly.table.full_table_name for anomaly in anomalies}
778790
rprint("\n[dim]📊 Analysis Summary:[/dim]")
779791
rprint(f"[dim]• Tables analyzed: {tables_analyzed}[/dim]")
780792
rprint(f"[dim]• Tables with issues: {len(unique_tables)}[/dim]")

0 commit comments

Comments
 (0)