diff --git a/.gitattributes b/.gitattributes index d7735378..55fec52f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.onnx filter=lfs diff=lfs merge=lfs -text *.engine filter=lfs diff=lfs merge=lfs -text *.plan filter=lfs diff=lfs merge=lfs -text +docs/Vocabulary/ORBvoc.txt filter=lfs diff=lfs merge=lfs -text diff --git a/app/backend/node_manager.py b/app/backend/node_manager.py index aa36b650..bb5b8824 100644 --- a/app/backend/node_manager.py +++ b/app/backend/node_manager.py @@ -31,13 +31,21 @@ from tool.ros2_node_manager import Ros2NodeManager -_REALSENSE_SCRIPT = '/tinynav/scripts/run_realsense_sensor.sh' -_VENV_SITE = '/tinynav/.venv/lib/python3.10/site-packages' +_DEFAULT_TINYNAV_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) +_TINYNAV_ROOT = os.environ.get('TINYNAV_ROOT', _DEFAULT_TINYNAV_ROOT) +_LOCAL_PREFIX = os.environ.get('LOCAL_PREFIX', '/userdata/local') +_DEVICE_VENV = os.environ.get('TINYNAV_VENV', '/userdata/junlinp/venv') +_DEFAULT_LOG_DIR = os.environ.get('TINYNAV_LOG_DIR', '/userdata/junlinp/logs') +_REALSENSE_SCRIPT = os.path.join(_TINYNAV_ROOT, 'scripts', 'run_realsense_sensor.sh') +_VENV_SITE = os.path.join(_TINYNAV_ROOT, '.venv', 'lib', 'python3.10', 'site-packages') _MAP_BUILD_DOMAIN_LOOPER = '231' # isolated domain to avoid live looper topic collision during map build +_ROS2_NODE_LIST_TIMEOUT = float(os.environ.get('TINYNAV_ROS2_NODE_LIST_TIMEOUT', '8')) +_ROS2_NODE_LIST_RETRIES = int(os.environ.get('TINYNAV_ROS2_NODE_LIST_RETRIES', '3')) # build_map_node.py emits "MAPPING_PERCENT:" lines on stdout so the # parent process can track progress without a separate bridge subprocess. _MAPPING_PERCENT_PREFIX = 'MAPPING_PERCENT:' +_ENABLE_CMD_VEL_NODE = os.environ.get('TINYNAV_ENABLE_CMD_VEL_NODE', '0') == '1' _COLOR_TOPIC_REALSENSE = '/camera/camera/color/image_raw' _COLOR_TOPIC_LOOPER = '/camera/camera/color/image_rect_raw/compressed' @@ -61,7 +69,9 @@ class BackendNode(Ros2NodeManager): """Ros2NodeManager + subscriptions needed by the HTTP/WS layer.""" - def __init__(self, tinynav_db_path: str = '/tinynav/tinynav_db'): + def __init__(self, tinynav_db_path: str | None = None): + if tinynav_db_path is None: + tinynav_db_path = os.path.join(_TINYNAV_ROOT, 'tinynav_db') super().__init__(tinynav_db_path=tinynav_db_path) self._lock = threading.Lock() @@ -307,9 +317,23 @@ def _detect_and_init_sensor(self): domain = os.environ.get('ROS_DOMAIN_ID', '0') self.get_logger().info(f'BackendNode ROS_DOMAIN_ID={domain}') try: - result = subprocess.run( - ['ros2', 'node', 'list'], capture_output=True, text=True, timeout=3 - ) + result = None + last_err = None + for _ in range(max(1, _ROS2_NODE_LIST_RETRIES)): + try: + result = subprocess.run( + ['ros2', 'node', 'list'], + capture_output=True, + text=True, + timeout=_ROS2_NODE_LIST_TIMEOUT, + ) + break + except Exception as e: + last_err = e + time.sleep(1.0) + if result is None: + raise RuntimeError(f'ros2 node list failed after retries: {last_err}') + if '/insight_full' in result.stdout.splitlines(): self._sensor_mode = 'looper' self.get_logger().info('Sensor mode: looper — launching looper bridge + planning') @@ -450,7 +474,7 @@ def _start_unitree_if_configured(self): _env['PYTHONPATH'] = _VENV_SITE + ':' + _env.get('PYTHONPATH', '') self._unitree_proc = self._launch_proc( 'unitree', - ['uv', 'run', 'python', '/tinynav/tinynav/platforms/unitree_control.py'], + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/platforms/unitree_control.py')], env=_env, ) self.get_logger().info('unitree_control started') @@ -536,57 +560,87 @@ def _make_log(self, name: str): """Open a timestamped log file under tinynav_db/logs/. Safe to close in parent after Popen — the child process inherits its own fd copy at fork time.""" from datetime import datetime - logs_dir = os.path.join(self.tinynav_db_path, 'logs') + logs_dir = _DEFAULT_LOG_DIR os.makedirs(logs_dir, exist_ok=True) ts = datetime.now().strftime('%Y_%m_%d_%H_%M_%S') path = os.path.join(logs_dir, f'{ts}_{name}.txt') return open(path, 'w') def _launch_proc(self, name: str, cmd: list[str], env: dict | None = None, - cwd: str = '/tinynav') -> subprocess.Popen: + cwd: str = _TINYNAV_ROOT) -> subprocess.Popen: """Spawn a subprocess with standard logging and process-group setup.""" lf = self._make_log(name) + run_env = self._normalize_run_env(env) proc = subprocess.Popen( cmd, preexec_fn=os.setsid, cwd=cwd, - env=env or os.environ.copy(), + env=run_env, stdout=lf, stderr=subprocess.STDOUT, ) lf.close() return proc + def _normalize_run_env(self, env: dict | None) -> dict: + run_env = (env or os.environ.copy()).copy() + # Ensure in-repo modules and compiled extension (tinynav_cpp_bind) are importable. + run_env['PYTHONPATH'] = f"{_TINYNAV_ROOT}:" + run_env.get('PYTHONPATH', '') + if os.path.exists(os.path.join(_DEVICE_VENV, 'bin', 'python3')): + run_env['PATH'] = f"{_DEVICE_VENV}/bin:" + run_env.get('PATH', '') + run_env['VIRTUAL_ENV'] = _DEVICE_VENV + run_env.setdefault('TMPDIR', '/userdata/tmp') + run_env.setdefault('TEMP', run_env['TMPDIR']) + run_env.setdefault('TMP', run_env['TMPDIR']) + run_env['PATH'] = f"{_LOCAL_PREFIX}/bin:" + run_env.get('PATH', '') + # Keep /userdata/junlinp/local first so pydbow3 resolves against rebuilt DBoW3. + run_env['LD_LIBRARY_PATH'] = ( + f"{_LOCAL_PREFIX}/lib:/userdata/junlinp/local/lib:/userdata/opencv-release/lib:" + + run_env.get('LD_LIBRARY_PATH', '') + ) + run_env['CMAKE_PREFIX_PATH'] = f"{_LOCAL_PREFIX}:" + run_env.get('CMAKE_PREFIX_PATH', '') + run_env['PKG_CONFIG_PATH'] = f"{_LOCAL_PREFIX}/lib/pkgconfig:" + run_env.get('PKG_CONFIG_PATH', '') + return run_env + def _stop_sensor_procs(self): for attr in ('_looper_bridge_proc', '_realsense_proc', '_perception_proc', '_planning_proc'): self._kill_proc(getattr(self, attr)) setattr(self, attr, None) + @staticmethod + def _proc_alive(proc: subprocess.Popen | None) -> bool: + return proc is not None and proc.poll() is None + def _launch_sensor_procs(self, env: dict): """Start sensor procs based on current _sensor_mode.""" if self._sensor_mode == 'looper': - self._looper_bridge_proc = self._launch_proc( - 'looper_bridge', - ['uv', 'run', 'python', '/tinynav/tool/looper_bridge_node.py'], - env=env, - ) - self._planning_proc = self._launch_proc( - 'planning', - ['uv', 'run', 'python', '/tinynav/tinynav/core/planning_node.py'], - env=env, - ) + if not self._proc_alive(self._looper_bridge_proc): + self._looper_bridge_proc = self._launch_proc( + 'looper_bridge', + ['python3', os.path.join(_TINYNAV_ROOT, 'tool/looper_bridge_node.py')], + env=env, + ) + if not self._proc_alive(self._planning_proc): + self._planning_proc = self._launch_proc( + 'planning', + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/planning_node.py')], + env=env, + ) elif self._sensor_mode == 'realsense': - self._realsense_proc = self._launch_proc( - 'realsense', - ['bash', _REALSENSE_SCRIPT], - ) - self._perception_proc = self._launch_proc( - 'perception', - ['uv', 'run', 'python', '/tinynav/tinynav/core/perception_node.py'], - env=env, - ) - self._planning_proc = self._launch_proc( - 'planning', - ['uv', 'run', 'python', '/tinynav/tinynav/core/planning_node.py'], - env=env, - ) + if not self._proc_alive(self._realsense_proc): + self._realsense_proc = self._launch_proc( + 'realsense', + ['bash', _REALSENSE_SCRIPT], + ) + if not self._proc_alive(self._perception_proc): + self._perception_proc = self._launch_proc( + 'perception', + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/perception_node.py')], + env=env, + ) + if not self._proc_alive(self._planning_proc): + self._planning_proc = self._launch_proc( + 'planning', + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/planning_node.py')], + env=env, + ) def _restart_sensor_procs(self): _env = os.environ.copy() @@ -604,16 +658,22 @@ def cmd_start_nav_nodes(self): self._map_node_proc = self._launch_proc( 'map_node', [ - 'uv', 'run', 'python', '/tinynav/tinynav/core/map_node.py', + 'python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/map_node.py'), '--tinynav_map_path', self.map_path, + '--loop-closure-mode', 'bow', + '--loop-closure-use-bow', + '--dbow3-vocabulary-path', os.path.join(_TINYNAV_ROOT, 'docs/Vocabulary/ORBvoc.txt'), ], env=_env, ) - self._cmd_vel_proc = self._launch_proc( - 'cmd_vel_control', - ['uv', 'run', 'python', '/tinynav/tinynav/platforms/cmd_vel_control.py'], - env=_env, - ) + if _ENABLE_CMD_VEL_NODE: + self._cmd_vel_proc = self._launch_proc( + 'cmd_vel_control', + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/platforms/cmd_vel_control.py')], + env=_env, + ) + else: + self._cmd_vel_proc = None with self._lock: self._nav_nodes_running = True self.get_logger().info('Nav nodes started') @@ -645,20 +705,26 @@ def cmd_restart_nav_nodes(self): self._planning_proc = self._launch_proc( 'planning', - ['uv', 'run', 'python', '/tinynav/tinynav/core/planning_node.py'], + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/planning_node.py')], env=_env, ) self._map_node_proc = self._launch_proc( 'map_node', - ['uv', 'run', 'python', '/tinynav/tinynav/core/map_node.py', - '--tinynav_map_path', self.map_path], - env=_env, - ) - self._cmd_vel_proc = self._launch_proc( - 'cmd_vel_control', - ['uv', 'run', 'python', '/tinynav/tinynav/platforms/cmd_vel_control.py'], + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/map_node.py'), + '--tinynav_map_path', self.map_path, + '--loop-closure-mode', 'bow', + '--loop-closure-use-bow', + '--dbow3-vocabulary-path', os.path.join(_TINYNAV_ROOT, 'docs/Vocabulary/ORBvoc.txt')], env=_env, ) + if _ENABLE_CMD_VEL_NODE: + self._cmd_vel_proc = self._launch_proc( + 'cmd_vel_control', + ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/platforms/cmd_vel_control.py')], + env=_env, + ) + else: + self._cmd_vel_proc = None with self._lock: self._nav_nodes_running = True self._localized = False @@ -740,17 +806,26 @@ def _start_rosbag_build_map(self): if self._sensor_mode == 'looper': _env['ROS_DOMAIN_ID'] = _MAP_BUILD_DOMAIN_LOOPER _env['PYTHONPATH'] = _VENV_SITE + ':' + _env.get('PYTHONPATH', '') - self.processes['perception'] = self._launch_proc( - 'perception', - ['uv', 'run', 'python', '/tinynav/tinynav/core/perception_node.py'], + source_node_cmd = ( + ['python3', os.path.join(_TINYNAV_ROOT, 'tool/looper_bridge_node.py')] + if self._sensor_mode == 'looper' + else ['python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/perception_node.py')] + ) + source_node_name = 'looper_bridge' if self._sensor_mode == 'looper' else 'perception' + self.processes[source_node_name] = self._launch_proc( + source_node_name, + source_node_cmd, env=_env, ) self.processes['build_map'] = self._launch_proc_tee( 'build_map_node', [ - 'uv', 'run', 'python', '/tinynav/tinynav/core/build_map_node.py', + 'python3', os.path.join(_TINYNAV_ROOT, 'tinynav/core/build_map_node.py'), '--map_save_path', self.map_path, '--bag_file', bag_file, + '--loop-closure-mode', 'bow', + '--loop-closure-use-bow', + '--dbow3-vocabulary-path', os.path.join(_TINYNAV_ROOT, 'docs/Vocabulary/ORBvoc.txt'), ], env=_env, ) @@ -758,13 +833,14 @@ def _start_rosbag_build_map(self): threading.Thread(target=self._on_build_map_done, daemon=True).start() def _launch_proc_tee(self, name: str, cmd: list[str], env: dict | None = None, - cwd: str = '/tinynav') -> subprocess.Popen: + cwd: str = _TINYNAV_ROOT) -> subprocess.Popen: """Like _launch_proc, but also tees stdout to a pipe so the caller can scan for MAPPING_PERCENT: lines while still logging everything to file.""" lf = self._make_log(name) + run_env = self._normalize_run_env(env) proc = subprocess.Popen( cmd, preexec_fn=os.setsid, cwd=cwd, - env=env or os.environ.copy(), + env=run_env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) threading.Thread( @@ -793,17 +869,25 @@ def _tee_and_read_percent(self, proc: subprocess.Popen, log_file): log_file.close() def _on_build_map_done(self): - """Wait for build_map to finish, then convert, archive, and restart.""" + """Wait for build_map to finish, then archive and restart.""" import shutil from datetime import datetime proc_build = self.processes.get('build_map') - if proc_build: - proc_build.wait() - subprocess.run([ - 'uv', 'run', 'python', '/tinynav/tool/convert_to_colmap_format.py', - '--input_dir', self.map_path, - '--output_dir', self.map_path, - ]) + build_ret = proc_build.wait() if proc_build else 1 + if build_ret != 0: + self.get_logger().error(f'build_map process failed with code {build_ret}') + self._stop_all() + self.state = 'idle' + self._pub_state() + self._restart_sensor_procs() + return + if not os.path.isdir(self.map_path): + self.get_logger().error(f'map output not found: {self.map_path}') + self._stop_all() + self.state = 'idle' + self._pub_state() + self._restart_sensor_procs() + return # mv map → maps/map_YYYY_MM_DD_HH_MM_SS, symlink back maps_dir = os.path.join(self.tinynav_db_path, 'maps') os.makedirs(maps_dir, exist_ok=True) @@ -926,8 +1010,8 @@ def publish_cmd_vel(self, linear_x: float, linear_y: float, angular_z: float): class NodeRunner: """Manages the rclpy lifecycle; spins BackendNode in a daemon thread.""" - def __init__(self, tinynav_db_path: str = '/tinynav/tinynav_db'): - self._db_path = tinynav_db_path + def __init__(self, tinynav_db_path: str | None = None): + self._db_path = tinynav_db_path or os.path.join(_TINYNAV_ROOT, 'tinynav_db') self.node: BackendNode | None = None self._thread: threading.Thread | None = None self._ready = threading.Event() diff --git a/app/backend/routers/files.py b/app/backend/routers/files.py index 8758b78a..55594333 100644 --- a/app/backend/routers/files.py +++ b/app/backend/routers/files.py @@ -7,7 +7,8 @@ def _db_root() -> Path: - return Path(os.environ.get('TINYNAV_DB_PATH', '/tinynav/tinynav_db')) + default_root = Path(__file__).resolve().parents[3] / 'tinynav_db' + return Path(os.environ.get('TINYNAV_DB_PATH', str(default_root))) def _path_size(p: Path) -> int: diff --git a/app/backend/routers/map.py b/app/backend/routers/map.py index 38a6beaa..cf5ff1d6 100644 --- a/app/backend/routers/map.py +++ b/app/backend/routers/map.py @@ -2,6 +2,7 @@ import json import os import re +from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException @@ -77,7 +78,7 @@ def map_set_active(map_name: str): import shutil if not re.match(r'^[a-zA-Z0-9_\-]+$', map_name): raise HTTPException(400, 'Invalid map name') - root = os.environ.get('TINYNAV_DB_PATH', '/tinynav/tinynav_db') + root = os.environ.get('TINYNAV_DB_PATH', str(Path(__file__).resolve().parents[3] / 'tinynav_db')) src = os.path.join(root, 'maps', map_name) if not os.path.isdir(src): raise HTTPException(404, f'Map {map_name!r} not found') @@ -93,7 +94,7 @@ def map_set_active(map_name: str): def _resolve_map_path(map_name: str) -> str: if not re.match(r'^[a-zA-Z0-9_\-]+$', map_name): raise HTTPException(400, 'Invalid map name') - root = os.environ.get('TINYNAV_DB_PATH', '/tinynav/tinynav_db') + root = os.environ.get('TINYNAV_DB_PATH', str(Path(__file__).resolve().parents[3] / 'tinynav_db')) path = os.path.join(root, 'maps', map_name) if not os.path.isdir(path) or not os.path.exists(os.path.join(path, 'occupancy_grid.npy')): raise HTTPException(404, f'Map {map_name!r} not found') diff --git a/app/backend/state.py b/app/backend/state.py index 12a1499b..17f5d65b 100644 --- a/app/backend/state.py +++ b/app/backend/state.py @@ -7,6 +7,7 @@ import os from .node_manager import NodeRunner -TINYNAV_DB_PATH = os.environ.get('TINYNAV_DB_PATH', '/tinynav/tinynav_db') +_DEFAULT_TINYNAV_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) +TINYNAV_DB_PATH = os.environ.get('TINYNAV_DB_PATH', os.path.join(_DEFAULT_TINYNAV_ROOT, 'tinynav_db')) runner = NodeRunner(tinynav_db_path=TINYNAV_DB_PATH) diff --git a/app/backend/ws.py b/app/backend/ws.py index e0fda2c8..f68690f8 100644 --- a/app/backend/ws.py +++ b/app/backend/ws.py @@ -10,6 +10,7 @@ from __future__ import annotations import asyncio +import base64 import json import os import time @@ -164,7 +165,7 @@ def _on_frame(frame: bytes): try: while True: frame = await queue.get() - await ws.send_bytes(frame) + await ws.send_text(base64.b64encode(frame).decode('ascii')) except WebSocketDisconnect: pass finally: diff --git a/app/frontend/lib/core/providers.dart b/app/frontend/lib/core/providers.dart index 98885a09..f5fb3e5c 100644 --- a/app/frontend/lib/core/providers.dart +++ b/app/frontend/lib/core/providers.dart @@ -113,8 +113,10 @@ final previewStreamProvider = ref.onDispose(() => channel.sink.close()); return channel.stream.map((data) { + if (data is String) return base64Decode(data); if (data is Uint8List) return data; if (data is List) return Uint8List.fromList(data); + if (data is ByteBuffer) return Uint8List.view(data); return Uint8List(0); }).where((b) => b.isNotEmpty); }); diff --git a/app/frontend/lib/main.dart b/app/frontend/lib/main.dart index 15d9a431..b868334b 100644 --- a/app/frontend/lib/main.dart +++ b/app/frontend/lib/main.dart @@ -9,13 +9,14 @@ import 'pages/setup_page.dart'; void main() async { WidgetsFlutterBinding.ensureInitialized(); final prefs = await SharedPreferences.getInstance(); - final savedIp = prefs.getString('device_ip'); + const defaultDeviceIp = '169.254.10.1'; + final savedIp = prefs.getString('device_ip') ?? defaultDeviceIp; runApp( ProviderScope( overrides: [ sharedPreferencesProvider.overrideWithValue(prefs), - if (savedIp != null) deviceIpProvider.overrideWith((ref) => savedIp), + deviceIpProvider.overrideWith((ref) => savedIp), ], child: const TinyNavApp(), ), diff --git a/app/frontend/lib/pages/operate_tab.dart b/app/frontend/lib/pages/operate_tab.dart index b1e9d85c..29e02a64 100644 --- a/app/frontend/lib/pages/operate_tab.dart +++ b/app/frontend/lib/pages/operate_tab.dart @@ -851,15 +851,12 @@ class _CameraPanelState extends ConsumerState<_CameraPanel> { } }); - if (selectedTopic != null) { - ref.listen>( - previewStreamProvider(selectedTopic), - (_, next) { - if (next case AsyncData(:final value)) { - if (mounted) setState(() => _latestFrame = value); - } - }, - ); + final previewFrame = selectedTopic == null + ? null + : ref.watch(previewStreamProvider(selectedTopic)).valueOrNull; + final frameToShow = previewFrame ?? _latestFrame; + if (previewFrame != null && !identical(previewFrame, _latestFrame)) { + _latestFrame = previewFrame; } return Container( @@ -867,10 +864,10 @@ class _CameraPanelState extends ConsumerState<_CameraPanel> { child: Stack( fit: StackFit.expand, children: [ - if (selectedTopic != null && _latestFrame != null) + if (selectedTopic != null && frameToShow != null) GestureDetector( onTap: () => _showFullscreen(context), - child: Image.memory(_latestFrame!, fit: BoxFit.contain, gaplessPlayback: true), + child: Image.memory(frameToShow, fit: BoxFit.contain, gaplessPlayback: true), ) else Center( @@ -942,7 +939,7 @@ class _CameraPanelState extends ConsumerState<_CameraPanel> { ), ), ), - if (selectedTopic != null && _latestFrame != null) + if (selectedTopic != null && frameToShow != null) Positioned( bottom: 8, right: 8, child: GestureDetector( diff --git a/app/frontend/macos/Flutter/GeneratedPluginRegistrant.swift b/app/frontend/macos/Flutter/GeneratedPluginRegistrant.swift index 724bb2ac..997e35da 100644 --- a/app/frontend/macos/Flutter/GeneratedPluginRegistrant.swift +++ b/app/frontend/macos/Flutter/GeneratedPluginRegistrant.swift @@ -6,7 +6,9 @@ import FlutterMacOS import Foundation import shared_preferences_foundation +import url_launcher_macos func RegisterGeneratedPlugins(registry: FlutterPluginRegistry) { SharedPreferencesPlugin.register(with: registry.registrar(forPlugin: "SharedPreferencesPlugin")) + UrlLauncherPlugin.register(with: registry.registrar(forPlugin: "UrlLauncherPlugin")) } diff --git a/docs/Vocabulary/ORBvoc.txt b/docs/Vocabulary/ORBvoc.txt new file mode 100644 index 00000000..d7c3a8bc --- /dev/null +++ b/docs/Vocabulary/ORBvoc.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8dd027f7a6cb88129821341194d7f2c75b77b3394257ddd0d2229863d1a3570 +size 145250924 diff --git a/install_python_deps.sh b/install_python_deps.sh new file mode 100755 index 00000000..c676ab0b --- /dev/null +++ b/install_python_deps.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +mkdir -p /userdata/tmp +export TMPDIR="${TMPDIR:-/userdata/tmp}" +export TEMP="${TMPDIR}" +export TMP="${TMPDIR}" + +python3 -m pip install --upgrade pip + +# Wheels may be unavailable on this device arch; fall back to source installs. +python3 -m pip install decord || python3 -m pip install 'git+https://github.com/dmlc/decord' +python3 -m pip install pydbow3 || python3 -m pip install 'git+https://github.com/JHMeusener/PyDBoW3.git' + +python3 - <<'PY' +import decord +import pydbow3 +print('decord_ok', getattr(decord, '__version__', 'unknown')) +print('pydbow3_ok', getattr(pydbow3, '__name__', 'pydbow3')) +PY diff --git a/run_backend.sh b/run_backend.sh new file mode 100755 index 00000000..81b6c996 --- /dev/null +++ b/run_backend.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +set -eo pipefail + +APP_ROOT="/userdata/junlinp/tinynav" +DB_PATH_DEFAULT="/userdata/junlinp/tinynav_db" +VENV_PATH_DEFAULT="/userdata/junlinp/venv" + +# Ensure ROS 2 Python packages (rclpy, msgs) are on PYTHONPATH. +if [ -f /opt/ros/humble/setup.bash ]; then + # setup.bash may reference unset vars; avoid nounset during source. + set +u + # shellcheck disable=SC1091 + source /opt/ros/humble/setup.bash + set -u +else + set -u +fi + +cd "$APP_ROOT" + +export VENV_PATH="${VENV_PATH:-$VENV_PATH_DEFAULT}" +if [ -x "${VENV_PATH}/bin/python3" ]; then + export PATH="${VENV_PATH}/bin:${PATH}" + export VIRTUAL_ENV="${VENV_PATH}" +fi + +export PYTHONPATH="$APP_ROOT:${PYTHONPATH:-}" +export TINYNAV_DB_PATH="${TINYNAV_DB_PATH:-$DB_PATH_DEFAULT}" +export LD_LIBRARY_PATH="/userdata/opencv-release/lib:/userdata/hobot/opt/hobot/deps:${LD_LIBRARY_PATH:-}" + +exec python3 -m uvicorn app.backend.main:app --host 0.0.0.0 --port 8000 diff --git a/scripts/setup_device_deps.sh b/scripts/setup_device_deps.sh new file mode 100755 index 00000000..fbbbb31d --- /dev/null +++ b/scripts/setup_device_deps.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# DEVICE_IP=169.254.10.1 DEVICE_USER=root DEVICE_PASS='looper@0731' bash scripts/setup_device_deps.sh + +DEVICE_IP="${DEVICE_IP:-169.254.10.1}" +DEVICE_USER="${DEVICE_USER:-root}" +DEVICE_PASS="${DEVICE_PASS:-}" + +if [[ -z "${DEVICE_PASS}" ]]; then + echo "DEVICE_PASS is required" >&2 + exit 1 +fi + +SSHPASS="sshpass -p ${DEVICE_PASS}" +SSH="${SSHPASS} ssh -o StrictHostKeyChecking=no ${DEVICE_USER}@${DEVICE_IP}" + +HOST_UTC="$(date -u +"%Y-%m-%d %H:%M:%S")" + +echo "[1/7] Sync device time (TLS-safe)" +${SSH} "date -u -s '${HOST_UTC}' >/dev/null" + +echo "[1.5/7] Ensure temp build dir on userdata" +${SSH} "mkdir -p /userdata/tmp" + +echo "[2/7] Install system build/runtime deps" +${SSH} "TMPDIR=/userdata/tmp TEMP=/userdata/tmp TMP=/userdata/tmp apt-get update && TMPDIR=/userdata/tmp TEMP=/userdata/tmp TMP=/userdata/tmp apt-get install -y \ + ca-certificates \ + build-essential cmake pkg-config git python3-dev \ + libeigen3-dev libceres-dev \ + libavcodec-dev libavformat-dev libavutil-dev libswresample-dev libswscale-dev \ + libavfilter-dev libavdevice-dev \ + ros-humble-sensor-msgs-py" + +echo "[3/7] Install Python deps" +${SSH} "TMPDIR=/userdata/tmp TEMP=/userdata/tmp TMP=/userdata/tmp python3 -m pip install --upgrade pip" +${SSH} "TMPDIR=/userdata/tmp TEMP=/userdata/tmp TMP=/userdata/tmp python3 -m pip install \ + fastapi 'uvicorn[standard]' pydantic websockets pillow \ + numpy<2 scipy numba fufpy async-lru codetiming tqdm einops av pybind11" + +echo "[4/7] Build tinynav_cpp_bind with single thread" +${SSH} "export TMPDIR=/userdata/tmp TEMP=/userdata/tmp TMP=/userdata/tmp && cd /userdata/junlinp/tinynav/tinynav/cpp && \ + rm -rf build && mkdir -p build && cd build && \ + PYBIND11_DIR=\$(python3 -m pybind11 --cmakedir) && \ + cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE='-O0 -g0' -Dpybind11_DIR=\"\${PYBIND11_DIR}\" && \ + cmake --build . -- -j1 && \ + cp -f *.so /userdata/junlinp/tinynav/tinynav/" + +echo "[5/7] Ensure logs dir exists" +${SSH} "mkdir -p /userdata/junlinp/logs" + +echo "[6/7] Verify critical imports" +${SSH} "source /opt/ros/humble/setup.bash && \ + PYTHONPATH=/userdata/junlinp/tinynav:\${PYTHONPATH:-} \ + python3 - <<'PY' +import tinynav.tinynav_cpp_bind as m +import sensor_msgs_py +from codetiming import Timer +import einops, tqdm, scipy, numba +print('cpp_bind_ok', hasattr(m, 'pose_graph_solve')) +print('sensor_msgs_py_ok') +print('deps_ok') +PY" + +echo "[7/7] Done" diff --git a/scripts/start_app.sh b/scripts/start_app.sh index 1ccd58c1..48cdd5bc 100755 --- a/scripts/start_app.sh +++ b/scripts/start_app.sh @@ -1,10 +1,80 @@ #!/bin/bash -BACKEND_PORT="${BACKEND_PORT:-8000}" FRONTEND_PORT="${FRONTEND_PORT:-80}" -TINYNAV_DB_PATH="${TINYNAV_DB_PATH:-/tinynav/tinynav_db}" +DEVICE_PROXY_ENABLE="${DEVICE_PROXY_ENABLE:-1}" +DEVICE_PROXY_LISTEN_HOST="${DEVICE_PROXY_LISTEN_HOST:-0.0.0.0}" +DEVICE_PROXY_LISTEN_PORT="${DEVICE_PROXY_LISTEN_PORT:-8000}" +DEVICE_PROXY_TARGET_HOST="${DEVICE_PROXY_TARGET_HOST:-169.254.10.1}" +DEVICE_PROXY_TARGET_PORT="${DEVICE_PROXY_TARGET_PORT:-8000}" +DEVICE_PROXY_SCRIPT="/tmp/tinynav_device_proxy.py" -tmux new-session -s app \; \ - split-window -h \; \ - select-pane -t 0 \; send-keys "cd /tinynav && TINYNAV_DB_PATH=$TINYNAV_DB_PATH uvicorn app.backend.main:app --host 0.0.0.0 --port $BACKEND_PORT" C-m \; \ - select-pane -t 1 \; send-keys "python -m http.server $FRONTEND_PORT --directory /tinynav/app/frontend/build/web" C-m +cat > "$DEVICE_PROXY_SCRIPT" <<'PY' +import socket +import threading + +LHOST = "__LHOST__" +LPORT = __LPORT__ +THOST = "__THOST__" +TPORT = __TPORT__ + + +def pump(src, dst): + try: + while True: + data = src.recv(65536) + if not data: + break + dst.sendall(data) + except Exception: + pass + finally: + try: + dst.shutdown(socket.SHUT_WR) + except Exception: + pass + + +def handle(client): + try: + target = socket.create_connection((THOST, TPORT), timeout=5) + except Exception: + client.close() + return + + t1 = threading.Thread(target=pump, args=(client, target), daemon=True) + t2 = threading.Thread(target=pump, args=(target, client), daemon=True) + t1.start() + t2.start() + t1.join() + t2.join() + client.close() + target.close() + + +server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +server.bind((LHOST, LPORT)) +server.listen(64) +print(f"proxy {LHOST}:{LPORT} -> {THOST}:{TPORT}", flush=True) + +while True: + c, _ = server.accept() + threading.Thread(target=handle, args=(c,), daemon=True).start() +PY + +sed -i "s/__LHOST__/$DEVICE_PROXY_LISTEN_HOST/; s/__LPORT__/$DEVICE_PROXY_LISTEN_PORT/; s/__THOST__/$DEVICE_PROXY_TARGET_HOST/; s/__TPORT__/$DEVICE_PROXY_TARGET_PORT/" "$DEVICE_PROXY_SCRIPT" + +# Stop any stale background proxy from older script versions. +pkill -f "$DEVICE_PROXY_SCRIPT" >/dev/null 2>&1 || true + +tmux kill-session -t app >/dev/null 2>&1 || true + +if [ "$DEVICE_PROXY_ENABLE" = "1" ]; then + tmux new-session -s app \; \ + split-window -h \; \ + select-pane -t 0 \; send-keys "python -m http.server $FRONTEND_PORT --directory /tinynav/app/frontend/build/web" C-m \; \ + select-pane -t 1 \; send-keys "python3 $DEVICE_PROXY_SCRIPT" C-m +else + tmux new-session -s app \; \ + send-keys "python -m http.server $FRONTEND_PORT --directory /tinynav/app/frontend/build/web" C-m +fi diff --git a/tests/test_models_trt.py b/tests/test_models_trt.py index 9af8b530..0c4cd414 100644 --- a/tests/test_models_trt.py +++ b/tests/test_models_trt.py @@ -6,7 +6,7 @@ import numpy as np import platform from codetiming import Timer -from tinynav.core.models_trt import SuperPointTRT, LightGlueTRT, StereoEngineTRT +from tinynav.core.models_trt import SuperPointTRT, LightGlueTRT, StereoEngineTRT, ORBFeatureTRTCompatible, ORBMatcher import asyncio import cv2 @@ -267,6 +267,81 @@ def test_stereo_engine_trt_with_looper_data(): cv2.imwrite(os.path.join(looper_dir, "depth_vis.png"), depth_color) +def test_orb_feature_interface_shapes(): + extractor = ORBFeatureTRTCompatible(nfeatures=500) + + img = np.zeros((240, 320), dtype=np.uint8) + cv2.rectangle(img, (40, 40), (280, 200), 255, 2) + cv2.line(img, (0, 0), (319, 239), 255, 2) + cv2.circle(img, (160, 120), 50, 255, 2) + + out = asyncio.run(extractor.infer(img)) + + assert "kpts" in out + assert "descps" in out + assert "mask" in out + + kpts = out["kpts"] + descps = out["descps"] + mask = out["mask"] + + assert kpts.dtype == np.float32 + assert descps.dtype == np.float32 + assert mask.dtype == np.float32 + + assert kpts.ndim == 3 and kpts.shape[0] == 1 and kpts.shape[2] == 2 + assert descps.ndim == 3 and descps.shape[0] == 1 and descps.shape[2] == 32 + assert mask.ndim == 3 and mask.shape[0] == 1 and mask.shape[2] == 1 + + n = kpts.shape[1] + assert descps.shape[1] == n + assert mask.shape[1] == n + + +def test_orb_feature_empty_case(): + extractor = ORBFeatureTRTCompatible(nfeatures=500) + blank = np.zeros((200, 300), dtype=np.uint8) + out = asyncio.run(extractor.infer(blank)) + + assert out["kpts"].shape == (1, 0, 2) + assert out["descps"].shape == (1, 0, 32) + assert out["mask"].shape == (1, 0, 1) + + +def test_orb_bfmatcher_with_ransac(): + extractor = ORBFeatureTRTCompatible(nfeatures=1000) + matcher = ORBMatcher(ransac_reproj_threshold=1.5) + + img0 = np.zeros((300, 400), dtype=np.uint8) + cv2.rectangle(img0, (60, 60), (340, 240), 255, 2) + cv2.line(img0, (70, 70), (330, 230), 255, 2) + cv2.circle(img0, (200, 150), 50, 255, 2) + + M = np.float32([[1, 0, 8], [0, 1, 5]]) + img1 = cv2.warpAffine(img0, M, (400, 300)) + + feats0 = asyncio.run(extractor.infer(img0)) + feats1 = asyncio.run(extractor.infer(img1)) + + match_result = asyncio.run( + matcher.infer( + feats0["kpts"], + feats1["kpts"], + feats0["descps"], + feats1["descps"], + feats0["mask"], + feats1["mask"], + img0.shape, + img1.shape, + ) + ) + + match_indices = match_result["match_indices"][0] + assert match_indices.ndim == 1 + assert match_indices.shape[0] == feats0["kpts"].shape[1] + assert np.any(match_indices != -1), "Expected at least one ORB match after BFMatcher+RANSAC." + + def test_stereo_engine_trt_with_realsense_data(): """ Run StereoEngineTRT on a RealSense stereo pair stored under tests/data/realsense. diff --git a/tinynav/core/build_map_node.py b/tinynav/core/build_map_node.py index 1c9575bb..a0ca552b 100644 --- a/tinynav/core/build_map_node.py +++ b/tinynav/core/build_map_node.py @@ -20,16 +20,19 @@ import sys from tinynav.tinynav_cpp_bind import pose_graph_solve -from tinynav.core.models_trt import LightGlueTRT, Dinov2TRT, SuperPointTRT from tinynav.core.planning_node import run_raycasting_loopy import logging import asyncio import shelve +import dbm.dumb +import pickle from tqdm import tqdm -import einops from tf2_msgs.msg import TFMessage from typing import Dict -from tool.video_db import VideoDB +try: + from tool.video_db import VideoDB +except ImportError: + VideoDB = None from tf2_ros import TransformBroadcaster from geometry_msgs.msg import TransformStamped,Point @@ -90,16 +93,143 @@ def solve_pose_graph(pose_graph_used_pose:dict, relative_pose_constraint:list, m optimized_camera_poses = pose_graph_solve(pose_graph_used_pose, relative_pose_constraint, constant_pose_index_dict, max_iteration_num) return {t: optimized_camera_poses[t] for t in sorted(optimized_camera_poses.keys())} -def find_loop(target_embedding:np.ndarray, embeddings:np.ndarray, loop_similarity_threshold:float, loop_top_k:int) -> list[tuple[int, float]]: - if len(embeddings) == 0: - return [] - similarity_array = einops.einsum(target_embedding, embeddings, "d, n d -> n") - top_k_indices = np.argsort(similarity_array, axis = 0) - loop_list = [] - for idx in top_k_indices: - if similarity_array[idx] > loop_similarity_threshold: - loop_list.append((idx, similarity_array[idx])) - return loop_list[-loop_top_k:] +class LoopClosure: + """ + Loop-closure candidate search over TinyNavDB keyframes. + + Input query is (kp, desc, embedding). Candidates are ranked by: + 1) embedding cosine similarity (embedding mode), or + 2) DBoW3 score (bow mode). + """ + + def __init__( + self, + db: "TinyNavDB", + timestamps: list[int], + mode: str = "embedding", + dbow3_vocabulary_path: str | None = None, + embedding_similarity_threshold: float = 0.90, + embedding_top_k: int = 20, + ): + self.db = db + self.timestamps = list(timestamps) + self.mode = mode + self.embedding_similarity_threshold = float(embedding_similarity_threshold) + self.embedding_top_k = int(embedding_top_k) + self.timestamp_to_idx = {ts: i for i, ts in enumerate(self.timestamps)} + self.dbow3_engine = None + + if self.mode not in ("embedding", "bow"): + raise ValueError(f"Unsupported LoopClosure mode: {self.mode}") + + if self.mode == "embedding": + if len(self.timestamps) == 0: + self.embeddings = np.zeros((0, 1), dtype=np.float32) + else: + self.embeddings = np.stack( + [self._normalize_embedding(self.db.get_embedding(ts)) for ts in self.timestamps], + axis=0, + ) + else: + self.embeddings = np.zeros((0, 1), dtype=np.float32) + if dbow3_vocabulary_path is None: + raise ValueError("dbow3_vocabulary_path is required when mode='bow'") + from tinynav.core.models_trt import DBoW3Engine + self.dbow3_engine = DBoW3Engine(dbow3_vocabulary_path) + total = len(self.timestamps) + for idx, ts in enumerate(self.timestamps): + logger.info( + f"[LoopClosure] loading map keyframe {idx + 1}/{total}, timestamp={int(ts)}" + ) + try: + _, _, cand_features, _, _ = self.db.get_depth_embedding_features_images(ts) + except Exception as e: + logger.error( + f"[LoopClosure] failed loading timestamp={int(ts)}: {e}" + ) + raise + self.dbow3_engine.add(cand_features) + + def add_timestamp(self, timestamp: int): + ts = int(timestamp) + if ts in self.timestamp_to_idx: + return + self.timestamp_to_idx[ts] = len(self.timestamps) + self.timestamps.append(ts) + if self.mode == "embedding": + emb = self._normalize_embedding(self.db.get_embedding(ts)) + emb = emb[None, :] + if self.embeddings.shape[0] == 0: + self.embeddings = emb + else: + self.embeddings = np.concatenate([self.embeddings, emb], axis=0) + else: + _, _, cand_features, _, _ = self.db.get_depth_embedding_features_images(ts) + self.dbow3_engine.add(cand_features) + + @staticmethod + def _normalize_embedding(embedding: np.ndarray) -> np.ndarray: + e = np.asarray(embedding, dtype=np.float32).reshape(-1) + n = np.linalg.norm(e) + return e / n if n > 0 else e + + def find_candidate_timestamps( + self, + kp: np.ndarray, + desc: np.ndarray, + embedding: np.ndarray, + top_k: int | None = None, + allowed_timestamps: set[int] | None = None, + ) -> list[dict]: + if self.mode == "embedding": + if self.embeddings.shape[0] == 0: + return [] + emb = self._normalize_embedding(embedding) + similarities = self.embeddings @ emb + sorted_idx = np.argsort(similarities)[::-1] + k = self.embedding_top_k if top_k is None else int(top_k) + candidate_idx = sorted_idx[: max(1, k)] + scored_candidates = [ + (self.timestamps[int(idx)], float(similarities[idx])) + for idx in candidate_idx + if float(similarities[idx]) >= self.embedding_similarity_threshold + ] + else: + if self.dbow3_engine is None: + return [] + query_features = { + "kpts": np.asarray(kp)[None, ...] if np.asarray(kp).ndim == 2 else np.asarray(kp), + "descps": np.asarray(desc)[None, ...] if np.asarray(desc).ndim == 2 else np.asarray(desc), + "mask": np.ones((1, np.asarray(desc).shape[0], 1), dtype=np.float32) + if np.asarray(desc).ndim == 2 + else np.ones((1, np.asarray(desc)[0].shape[0], 1), dtype=np.float32), + } + k = self.embedding_top_k if top_k is None else int(top_k) + dbow_results = self.dbow3_engine.query(query_features, max_results=max(1, k)) + scored_candidates = [] + for r in dbow_results: + idx = int(r["id"]) + if 0 <= idx < len(self.timestamps): + scored_candidates.append((self.timestamps[idx], float(r["score"]))) + + results = [] + for ts, sim in scored_candidates: + if allowed_timestamps is not None and int(ts) not in allowed_timestamps: + continue + results.append( + { + "timestamp": int(ts), + "similarity": sim, + } + ) + + results.sort(key=lambda x: x["similarity"], reverse=True) + return results + + +class DummyEmbeddingEngine: + async def infer(self, _image: np.ndarray) -> np.ndarray: + return np.zeros((1, 768), dtype=np.float32) def generate_occupancy_map(poses, db, K, baseline, resolution = 0.1, step = 100): """ @@ -171,7 +301,9 @@ def generate_occupancy_map(poses, db, K, baseline, resolution = 0.1, step = 100) class IntKeyShelf: def __init__(self, filename): - self.db = shelve.open(filename) + # Force dbm.dumb backend so map DB files are portable across host/device runtimes. + self._raw_db = dbm.dumb.open(filename, "c") + self.db = shelve.Shelf(self._raw_db, protocol=pickle.HIGHEST_PROTOCOL) def __getitem__(self, key: int): return self.db[str(key)] @@ -191,6 +323,9 @@ def keys(self): def close(self): self.db.close() + def sync(self): + self.db.sync() + class OdomPoseRecorder: """ @@ -294,10 +429,13 @@ def infra1_loader(): return None return self.infra1_video_db.read(key_int) - return self.depths[key], self.embeddings[key], self.features[key], rgb_loader, infra1_loader + return self.depths[key], self.get_embedding(key), self.features[key], rgb_loader, infra1_loader def get_embedding(self, key:int): - return self.embeddings[key] + key_int = int(key) + if key_int in self.embeddings: + return self.embeddings[key_int] + return np.zeros((1,), dtype=np.float32) def close(self): self.features.close() @@ -306,6 +444,15 @@ def close(self): self.infra1_video_db.close() self.rgb_video_db.close() + def sync(self): + self.features.sync() + self.embeddings.sync() + self.depths.sync() + if hasattr(self.infra1_video_db, "sync"): + self.infra1_video_db.sync() + if hasattr(self.rgb_video_db, "sync"): + self.rgb_video_db.sync() + class BagPlayer(Node): def __init__(self, bag_uri: str, storage_id: str = "sqlite3", serialization_format: str = "cdr", ): @@ -429,14 +576,27 @@ def play_next(self) -> bool: return True class BuildMapNode(Node): - def __init__(self, map_save_path:str, verbose_timer: bool = True): + def __init__( + self, + map_save_path: str, + extractor, + matcher, + embedding_extractor, + loop_closure_mode: str = "embedding", + loop_closure_use_bow: bool = False, + dbow3_vocabulary_path: str | None = None, + verbose_timer: bool = True, + ): super().__init__('build_map_node') self.verbose_timer = verbose_timer self.logger = logging.getLogger(__name__) self.timer_logger = self.logger.info if verbose_timer else self.logger.debug - self.super_point_extractor = SuperPointTRT() - self.light_glue_matcher = LightGlueTRT() - self.dinov2_model = Dinov2TRT() + self.extractor = extractor + self.matcher = matcher + self.embedding_extractor = embedding_extractor + self.loop_closure_use_bow = bool(loop_closure_use_bow) + self.loop_closure_mode = "bow" if self.loop_closure_use_bow else loop_closure_mode + self.dbow3_vocabulary_path = dbow3_vocabulary_path self.bridge = CvBridge() @@ -470,6 +630,8 @@ def __init__(self, map_save_path:str, verbose_timer: bool = True): self.pose_graph_used_pose = {} self.relative_pose_constraint = [] self.last_keyframe_timestamp = None + self.processed_keyframes = 0 + self.db_sync_every = max(1, int(os.getenv("TINYNAV_DB_SYNC_EVERY", "1"))) self.continuous_odom_recorder = OdomPoseRecorder(map_save_path, "mapping") os.makedirs(f"{map_save_path}", exist_ok=True) @@ -479,6 +641,14 @@ def __init__(self, map_save_path:str, verbose_timer: bool = True): self.loop_similarity_threshold = 0.90 self.loop_top_k = 1 + self.loop_closure = LoopClosure( + db=self.db, + timestamps=[], + mode=self.loop_closure_mode, + dbow3_vocabulary_path=self.dbow3_vocabulary_path, + embedding_similarity_threshold=self.loop_similarity_threshold, + embedding_top_k=self.loop_top_k, + ) self.map_save_path = map_save_path self._save_completed = False @@ -588,16 +758,19 @@ def process(self, keyframe_image_msg:Image, keyframe_odom_msg:Odometry, depth_ms with Timer(name = "get embeddings", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): embedding = self.get_embeddings(infra1_image) - embedding = embedding / np.linalg.norm(embedding) + embedding_norm = np.linalg.norm(embedding) + if embedding_norm > 0: + embedding = embedding / embedding_norm self.db.set_entry(keyframe_image_timestamp, embedding = embedding) - with Timer(name = "super point extractor", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): - features = asyncio.run(self.super_point_extractor.infer(infra1_image)) + with Timer(name = "feature extractor", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): + features = asyncio.run(self.extractor.infer(infra1_image)) self.db.set_entry(keyframe_image_timestamp, features = features) with Timer(name = "loop and pose graph solve", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): if len(self.odom) == 0 and self.last_keyframe_timestamp is None: self.odom[keyframe_image_timestamp] = odom self.pose_graph_used_pose[keyframe_image_timestamp] = odom + # self.loop_closure.add_timestamp(keyframe_image_timestamp) # temp disabled else: last_keyframe_odom_pose = self.odom[self.last_keyframe_timestamp] T_prev_curr = np.linalg.inv(last_keyframe_odom_pose) @ odom @@ -607,16 +780,24 @@ def process(self, keyframe_image_msg:Image, keyframe_odom_msg:Odometry, depth_ms def find_loop_and_pose_graph(timestamp): - target_embedding = self.db.get_embedding(timestamp) valid_timestamp = [t for t in self.pose_graph_used_pose.keys() if t + 10 * 1e9 < timestamp] - valid_embeddings = np.array([self.db.get_embedding(t) for t in valid_timestamp]) - - idx_to_timestamp = {i:t for i, t in enumerate(valid_timestamp)} + if len(valid_timestamp) == 0: + return + target_embedding = self.db.get_embedding(timestamp) + _, _, curr_features, _, _ = self.db.get_depth_embedding_features_images(timestamp) + curr_kp = curr_features["kpts"][0] if curr_features["kpts"].ndim == 3 else curr_features["kpts"] + curr_desc = curr_features["descps"][0] if curr_features["descps"].ndim == 3 else curr_features["descps"] with Timer(name = "find loop", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): - loop_list = find_loop(target_embedding, valid_embeddings, self.loop_similarity_threshold, self.loop_top_k) + loop_list = self.loop_closure.find_candidate_timestamps( + curr_kp, + curr_desc, + target_embedding, + top_k=self.loop_top_k, + allowed_timestamps=set(valid_timestamp), + ) with Timer(name = "Relative pose estimation", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): - for idx, similarity in loop_list: - prev_timestamp = idx_to_timestamp[idx] + for candidate in loop_list: + prev_timestamp = candidate["timestamp"] curr_timestamp = timestamp prev_depth, _, prev_features, _, _ = self.db.get_depth_embedding_features_images(prev_timestamp) curr_depth, _, curr_features, _, _ = self.db.get_depth_embedding_features_images(curr_timestamp) @@ -627,7 +808,8 @@ def find_loop_and_pose_graph(timestamp): print(f"Added loop relative pose constraint: {curr_timestamp} -> {prev_timestamp}") with Timer(name = "solve pose graph", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): self.pose_graph_used_pose = solve_pose_graph(self.pose_graph_used_pose, self.relative_pose_constraint, max_iteration_num = 5) - find_loop_and_pose_graph(keyframe_image_timestamp) + # find_loop_and_pose_graph(keyframe_image_timestamp) # temp disabled + # self.loop_closure.add_timestamp(keyframe_image_timestamp) # temp disabled with Timer(name = "publish local pointcloud", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): cloud = depth_to_cloud(depth, self.K, 30, 3) @@ -638,14 +820,20 @@ def find_loop_and_pose_graph(timestamp): with Timer(name = "pose graph trajectory publish", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): self.pose_graph_trajectory_publish(keyframe_image_timestamp) + self.processed_keyframes += 1 + if self.processed_keyframes % self.db_sync_every == 0: + self.db.sync() + self.get_logger().info( + f"Synced TinyNavDB to disk at keyframe {self.processed_keyframes}" + ) self.last_keyframe_timestamp = keyframe_image_timestamp def get_embeddings(self, image: np.ndarray) -> np.ndarray: # shape: (1, 768) - return asyncio.run(self.dinov2_model.infer(image)) + return asyncio.run(self.embedding_extractor.infer(image)) def match_keypoints(self, feats0:dict, feats1:dict, image_shape = np.array([848, 480], dtype = np.int64)) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - match_result = asyncio.run(self.light_glue_matcher.infer(feats0["kpts"], feats1["kpts"], feats0['descps'], feats1['descps'], feats0['mask'], feats1['mask'], image_shape, image_shape)) + match_result = asyncio.run(self.matcher.infer(feats0["kpts"], feats1["kpts"], feats0['descps'], feats1['descps'], feats0['mask'], feats1['mask'], image_shape, image_shape)) match_indices = match_result["match_indices"][0] valid_mask = match_indices != -1 keypoints0 = feats0["kpts"][0][valid_mask] @@ -851,11 +1039,45 @@ def main(args=None): parser.add_argument("--map_save_path", type=str, default="tinynav_db") parser.add_argument("--verbose_timer", action="store_true", default=True, help="Enable verbose timer output") parser.add_argument("--no_verbose_timer", dest="verbose_timer", action="store_false", help="Disable verbose timer output") + parser.add_argument("--loop-closure-mode", type=str, default="embedding", choices=["embedding", "bow"]) + parser.add_argument("--loop-closure-use-bow", action="store_true", help="Use ORB+BF and DBoW3 for loop closure") + parser.add_argument( + "--dbow3-vocabulary-path", + type=str, + default="/tinynav/docs/Vocabulary/ORBvoc.txt", + help="DBoW3 vocabulary path for bow mode", + ) parsed_args, unknown_args = parser.parse_known_args(sys.argv[1:]) + use_bow = parsed_args.loop_closure_use_bow or parsed_args.loop_closure_mode == "bow" + if use_bow: + parsed_args.loop_closure_mode = "bow" + if not os.path.exists(parsed_args.dbow3_vocabulary_path): + raise FileNotFoundError( + f"DBoW3 vocabulary file not found: {parsed_args.dbow3_vocabulary_path}" + ) + from tinynav.core.models_trt import ORBFeatureTRTCompatible, ORBMatcher + extractor = ORBFeatureTRTCompatible() + matcher = ORBMatcher() + embedding_extractor = DummyEmbeddingEngine() + else: + from tinynav.core.models_trt import Dinov2TRT, LightGlueTRT, SuperPointTRT + extractor = SuperPointTRT() + matcher = LightGlueTRT() + embedding_extractor = Dinov2TRT() + exec_ = SingleThreadedExecutor() player_node = BagPlayer(parsed_args.bag_file) - map_node = BuildMapNode(parsed_args.map_save_path, verbose_timer=parsed_args.verbose_timer) + map_node = BuildMapNode( + parsed_args.map_save_path, + extractor=extractor, + matcher=matcher, + embedding_extractor=embedding_extractor, + loop_closure_mode=parsed_args.loop_closure_mode, + loop_closure_use_bow=use_bow, + dbow3_vocabulary_path=parsed_args.dbow3_vocabulary_path, + verbose_timer=parsed_args.verbose_timer, + ) image_transports_node = ImageTransportsNode() exec_.add_node(player_node) exec_.add_node(map_node) diff --git a/tinynav/core/map_node.py b/tinynav/core/map_node.py index 232866b4..f7c09e54 100644 --- a/tinynav/core/map_node.py +++ b/tinynav/core/map_node.py @@ -18,14 +18,21 @@ import argparse from tinynav.tinynav_cpp_bind import pose_graph_solve -from tinynav.core.models_trt import LightGlueTRT, Dinov2TRT, SuperPointTRT +from tinynav.core.models_trt import ( + Dinov2TRT, + LightGlueTRT, + ORBFeatureTRTCompatible, + ORBMatcher, + SuperPointTRT, +) import logging import asyncio +import time from tf2_ros import TransformBroadcaster from tinynav.core.build_map_node import TinyNavDB -from tinynav.core.build_map_node import find_loop, solve_pose_graph +from tinynav.core.build_map_node import solve_pose_graph import einops -from tinynav.core.build_map_node import OdomPoseRecorder +from tinynav.core.build_map_node import OdomPoseRecorder, LoopClosure logger = logging.getLogger(__name__) @@ -173,8 +180,24 @@ def get_queue_index(sdf_value: float) -> int: parent[neighbor] = current return [] +class DummyEmbeddingEngine: + async def infer(self, _image: np.ndarray) -> np.ndarray: + return np.zeros((1, 768), dtype=np.float32) + + class MapNode(Node): - def __init__(self, tinynav_db_path: str, tinynav_map_path: str, verbose_timer: bool = True): + def __init__( + self, + tinynav_db_path: str, + tinynav_map_path: str, + extractor, + matcher, + embedding_extractor, + loop_closure_mode: str = "embedding", + loop_closure_use_bow: bool = False, + dbow3_vocabulary_path: str | None = None, + verbose_timer: bool = True, + ): """Initialization Args: @@ -185,9 +208,12 @@ def __init__(self, tinynav_db_path: str, tinynav_map_path: str, verbose_timer: b super().__init__('map_node') self.logger = logging.getLogger(__name__) self.timer_logger = self.logger.info if verbose_timer else self.logger.debug - self.super_point_extractor = SuperPointTRT() - self.light_glue_matcher = LightGlueTRT() - self.dinov2_model = Dinov2TRT() + self.extractor = extractor + self.matcher = matcher + self.embedding_extractor = embedding_extractor + self.loop_closure_use_bow = bool(loop_closure_use_bow) + self.loop_closure_mode = "bow" if self.loop_closure_use_bow else loop_closure_mode + self.dbow3_vocabulary_path = dbow3_vocabulary_path self.tinynav_db_path = tinynav_db_path self.bridge = CvBridge() @@ -229,11 +255,25 @@ def __init__(self, tinynav_db_path: str, tinynav_map_path: str, verbose_timer: b os.makedirs(f"{tinynav_db_path}/nav_temp", exist_ok=True) self.nav_temp_db = TinyNavDB(f"{tinynav_db_path}/nav_temp", is_scratch=True) + self.nav_loop_closure = LoopClosure( + db=self.nav_temp_db, + timestamps=[], + mode=self.loop_closure_mode, + dbow3_vocabulary_path=self.dbow3_vocabulary_path, + embedding_similarity_threshold=self.loop_similarity_threshold, + embedding_top_k=self.loop_top_k, + ) self.map_poses = np.load(f"{tinynav_map_path}/poses.npy", allow_pickle=True).item() self.map_K = np.load(f"{tinynav_map_path}/intrinsics.npy") self.db = TinyNavDB(tinynav_map_path, is_scratch=False) - self.map_embeddings_idx_to_timestamp = {idx: timestamp for idx, timestamp in enumerate(self.map_poses.keys())} - self.map_embeddings = np.stack([self.db.get_embedding(timestamp) for idx, timestamp in self.map_embeddings_idx_to_timestamp.items()]) + self.map_loop_closure = LoopClosure( + db=self.db, + timestamps=list(self.map_poses.keys()), + mode=self.loop_closure_mode, + dbow3_vocabulary_path=self.dbow3_vocabulary_path, + embedding_similarity_threshold=self.relocalization_threshold, + embedding_top_k=self.relocalization_loop_top_k, + ) self.occupancy_map = np.load(f"{tinynav_map_path}/occupancy_grid.npy") self.occupancy_map_meta = np.load(f"{tinynav_map_path}/occupancy_meta.npy") self.sdf_map = np.load(f"{tinynav_map_path}/sdf_map.npy") @@ -319,13 +359,20 @@ def localization_stop_callback(self, msg: Bool): self.localization_data_saved_pub.publish(save_finished_msg) def keyframe_callback(self, keyframe_image_msg:Image, keyframe_odom_msg:Odometry, depth_msg:Image): - self.keyframe_mapping(keyframe_image_msg, keyframe_odom_msg, depth_msg) + t_start = time.perf_counter() + self.get_logger().info("keyframe_mapping is temporarily disabled.") image = self.bridge.imgmsg_to_cv2(keyframe_image_msg, desired_encoding="mono8") + t_decode = time.perf_counter() keyframe_image_timestamp_ns = int(keyframe_image_msg.header.stamp.sec * 1e9) + int(keyframe_image_msg.header.stamp.nanosec) success, pose_in_world = self.keyframe_relocalization(keyframe_image_msg.header.stamp, image) + odom, _ = msg2np(keyframe_odom_msg) + self.pose_graph_used_pose[keyframe_image_timestamp_ns] = odom + self.odom[keyframe_image_timestamp_ns] = odom + t_reloc = time.perf_counter() if success: self.compute_transform_from_map_to_odom() + t_tf = time.perf_counter() with Timer(name = "nav path", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): self.try_publish_nav_path(keyframe_image_timestamp_ns) @@ -333,6 +380,21 @@ def keyframe_callback(self, keyframe_image_msg:Image, keyframe_odom_msg:Odometry # and record the map pose # compute the coordinate transform from the map pose to the keyframe pose # publish the nav path from the map pose to the keyframe pose with the cost map + t_nav = time.perf_counter() + + decode_ms = (t_decode - t_start) * 1000.0 + relocal_ms = (t_reloc - t_decode) * 1000.0 + tf_ms = (t_tf - t_reloc) * 1000.0 + nav_ms = (t_nav - t_tf) * 1000.0 + total_ms = (t_nav - t_start) * 1000.0 + msg = ( + f"Localization loop timing ms: total={total_ms:.1f}, decode={decode_ms:.1f}, " + f"relocal={relocal_ms:.1f}, tf_update={tf_ms:.1f}, nav_path={nav_ms:.1f}, success={success}" + ) + if total_ms > 500.0: + self.get_logger().warning(msg) + else: + self.get_logger().info(msg) def keyframe_mapping_with_timer(self, keyframe_image_msg:Image, keyframe_odom_msg:Odometry, depth_msg:Image): with Timer(name="Mapping Loop", text="\n\n[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): @@ -354,12 +416,13 @@ def keyframe_mapping(self, keyframe_image_msg:Image, keyframe_odom_msg:Odometry, self.nav_temp_db.set_entry(keyframe_image_timestamp, depth = depth, infra1_image = image, rgb_image = rgb_image_place_holder) embedding = self.get_embeddings(image) self.nav_temp_db.set_entry(keyframe_image_timestamp, embedding = embedding) - features = asyncio.run(self.super_point_extractor.infer(image)) + features = asyncio.run(self.extractor.infer(image)) self.nav_temp_db.set_entry(keyframe_image_timestamp, features = features) if len(self.odom) == 0 and self.last_keyframe_timestamp is None: self.odom[keyframe_odom_timestamp] = odom self.pose_graph_used_pose[keyframe_odom_timestamp] = odom + self.nav_loop_closure.add_timestamp(keyframe_odom_timestamp) else: last_keyframe_odom_pose = self.odom[self.last_keyframe_timestamp] T_prev_curr = se3_inv(last_keyframe_odom_pose) @ odom @@ -367,27 +430,42 @@ def keyframe_mapping(self, keyframe_image_msg:Image, keyframe_odom_msg:Odometry, self.pose_graph_used_pose[keyframe_image_timestamp] = odom self.odom[keyframe_image_timestamp] = odom def find_loop_and_pose_graph(timestamp): - target_embedding = self.nav_temp_db.get_embedding(timestamp) valid_timestamp = [t for t in self.pose_graph_used_pose.keys() if t + 10 * 1e9 < timestamp] - valid_embeddings = np.array([self.nav_temp_db.get_embedding(t) for t in valid_timestamp]) - - idx_to_timestamp = {i:t for i, t in enumerate(valid_timestamp)} + if len(valid_timestamp) == 0: + return + target_embedding = self.nav_temp_db.get_embedding(timestamp) + _, _, curr_features, _, _ = self.nav_temp_db.get_depth_embedding_features_images(timestamp) + curr_kp = curr_features["kpts"][0] if curr_features["kpts"].ndim == 3 else curr_features["kpts"] + curr_desc = curr_features["descps"][0] if curr_features["descps"].ndim == 3 else curr_features["descps"] with Timer(name = "find loop", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): - loop_list = find_loop(target_embedding, valid_embeddings, self.loop_similarity_threshold, self.loop_top_k) + loop_list = self.nav_loop_closure.find_candidate_timestamps( + curr_kp, + curr_desc, + target_embedding, + top_k=self.loop_top_k, + allowed_timestamps=set(valid_timestamp), + ) with Timer(name = "Relative pose estimation", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): - for idx, similarity in loop_list: - prev_timestamp = idx_to_timestamp[idx] + for candidate in loop_list: + prev_timestamp = candidate["timestamp"] curr_timestamp = timestamp + similarity = float(candidate["similarity"]) + self.logger.info( + f"Loop candidate curr={curr_timestamp} prev={prev_timestamp} similarity={float(similarity):.4f}" + ) prev_depth, _, prev_features, _, _ = self.nav_temp_db.get_depth_embedding_features_images(prev_timestamp) curr_depth, _, curr_features, _, _ = self.nav_temp_db.get_depth_embedding_features_images(curr_timestamp) prev_matched_keypoints, curr_matched_keypoints, matches = self.match_keypoints(prev_features, curr_features) success, T_prev_curr, _, _, inliers = estimate_pose(prev_matched_keypoints, curr_matched_keypoints, curr_depth, self.K) if success and len(inliers) >= 100: self.relative_pose_constraint.append((curr_timestamp, prev_timestamp, T_prev_curr)) - #print(f"Added loop relative pose constraint: {curr_timestamp} -> {prev_timestamp}") + self.logger.info( + f"Loop accepted curr={curr_timestamp} prev={prev_timestamp} similarity={float(similarity):.4f} inliers={len(inliers)}" + ) with Timer(name = "solve pose graph", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.timer_logger): self.pose_graph_used_pose = solve_pose_graph(self.pose_graph_used_pose, self.relative_pose_constraint, max_iteration_num = 5) find_loop_and_pose_graph(keyframe_image_timestamp) + self.nav_loop_closure.add_timestamp(keyframe_image_timestamp) self.pose_graph_trajectory_publish(keyframe_image_timestamp) self.last_keyframe_timestamp = keyframe_odom_timestamp self.last_keyframe_image = image @@ -395,11 +473,19 @@ def find_loop_and_pose_graph(timestamp): def get_embeddings(self, image: np.ndarray) -> np.ndarray: # shape: (1, 768) - return asyncio.run(self.dinov2_model.infer(image)) + return asyncio.run(self.embedding_extractor.infer(image)) def match_keypoints(self, feats0:dict, feats1:dict, image_shape = np.array([848, 480], dtype = np.int64)) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - match_result = asyncio.run(self.light_glue_matcher.infer(feats0["kpts"], feats1["kpts"], feats0['descps'], feats1['descps'], feats0['mask'], feats1['mask'], image_shape, image_shape)) + match_result = asyncio.run(self.matcher.infer(feats0["kpts"], feats1["kpts"], feats0['descps'], feats1['descps'], feats0['mask'], feats1['mask'], image_shape, image_shape)) match_indices = match_result["match_indices"][0] + if feats0["kpts"].ndim != 3 or feats1["kpts"].ndim != 3: + return np.zeros((0, 2), dtype=np.float32), np.zeros((0, 2), dtype=np.float32), np.zeros((0, 2), dtype=np.int64) + if feats0["kpts"].shape[0] == 0 or feats1["kpts"].shape[0] == 0: + return np.zeros((0, 2), dtype=np.float32), np.zeros((0, 2), dtype=np.float32), np.zeros((0, 2), dtype=np.int64) + # Guard against invalid indices returned by matcher. + match_indices = match_indices.copy() + invalid = (match_indices < 0) | (match_indices >= feats1["kpts"][0].shape[0]) | (np.arange(match_indices.shape[0]) >= feats0["kpts"][0].shape[0]) + match_indices[invalid] = -1 valid_mask = match_indices != -1 keypoints0 = feats0["kpts"][0][valid_mask] keypoints1 = feats1["kpts"][0][match_indices[valid_mask]] @@ -433,15 +519,24 @@ def relocalize_with_depth(self, keyframe: np.ndarray, keyframe_features: dict, K if K is None: return False, np.eye(4), -np.inf query_embedding = self.get_embeddings(keyframe) - query_embedding_normed = query_embedding / np.linalg.norm(query_embedding) - - idx_and_similarity_array = find_loop(query_embedding_normed, self.map_embeddings, self.relocalization_threshold, self.relocalization_loop_top_k) - max_similarity = np.max([similarity for _, similarity in idx_and_similarity_array]) if len(idx_and_similarity_array) > 0 else 0 - if len(idx_and_similarity_array) > 0: + query_embedding_norm = np.linalg.norm(query_embedding) + if query_embedding_norm > 0: + query_embedding = query_embedding / query_embedding_norm + + query_kp = keyframe_features["kpts"][0] if keyframe_features["kpts"].ndim == 3 else keyframe_features["kpts"] + query_desc = keyframe_features["descps"][0] if keyframe_features["descps"].ndim == 3 else keyframe_features["descps"] + candidates = self.map_loop_closure.find_candidate_timestamps( + query_kp, + query_desc, + query_embedding, + top_k=self.relocalization_loop_top_k, + ) + max_similarity = max([c["similarity"] for c in candidates]) if len(candidates) > 0 else 0 + if len(candidates) > 0: point_3d_in_world_list = [] point_2d_in_keyframe_list = [] - for idx_in_map, similarity in idx_and_similarity_array: - timestamp_in_map = self.map_embeddings_idx_to_timestamp[idx_in_map] + for candidate in candidates: + timestamp_in_map = int(candidate["timestamp"]) reference_keyframe_pose = self.map_poses[timestamp_in_map] reference_depth, _, reference_features, _, _ = self.db.get_depth_embedding_features_images(timestamp_in_map) reference_matched_keypoints, keyframe_matched_keypoints, matches = self.match_keypoints(reference_features, keyframe_features) @@ -468,7 +563,7 @@ def relocalize_with_depth(self, keyframe: np.ndarray, keyframe_features: dict, K print(f"not enough landmarks to relocalize, {len(point_3d_in_world_list)}") return False, np.eye(4), -np.inf else: - print(f"not enough similar embeddings to relocalize, {len(idx_and_similarity_array)}, max_similarity : {max_similarity}") + print(f"not enough similar embeddings to relocalize, {len(candidates)}, max_similarity : {max_similarity}") return False, np.eye(4), -np.inf def keypoint_with_depth_to_3d(self, keypoints:np.ndarray, depth:np.ndarray, pose_from_camera_to_world:np.ndarray, K:np.ndarray): @@ -502,7 +597,7 @@ def keypoint_with_depth_to_3d(self, keypoints:np.ndarray, depth:np.ndarray, pose @Timer(name="Relocalization loop", text="\n\n[{name}] Elapsed time: {milliseconds:.0f} ms") def keyframe_relocalization(self, timestamp, image:np.ndarray) -> tuple[bool, np.ndarray]: - features = asyncio.run(self.super_point_extractor.infer(image)) + features = asyncio.run(self.extractor.infer(image)) res, pose_in_camera, pose_cov_weight = self.relocalize_with_depth(image, features, self.K) if res: # publish the relocalization pose for debug @@ -696,6 +791,7 @@ def generate_nav_path_in_map(self, pose_in_map: np.ndarray, target_poi: np.ndarr or poi_goal_idx[2] < 0 or poi_goal_idx[2] >= self.occupancy_map.shape[2] ): + print("here") return None sdf_start_path = search_close_to_sdf_map(start_idx, self.sdf_map, self.occupancy_map, 0.2) sdf_goal_path = search_close_to_sdf_map(poi_goal_idx, self.sdf_map, self.occupancy_map, 0.2) @@ -725,10 +821,42 @@ def main(args=None): parser.add_argument("--tinynav_map_path", type=str, required=True) parser.add_argument("--verbose_timer", action="store_true", default=True, help="Enable verbose timer output") parser.add_argument("--no_verbose_timer", dest="verbose_timer", action="store_false", help="Disable verbose timer output") + parser.add_argument("--loop-closure-mode", type=str, default="embedding", choices=["embedding", "bow"]) + parser.add_argument("--loop-closure-use-bow", action="store_true", help="Use ORB+BF and DBoW3 for loop closure") + parser.add_argument( + "--dbow3-vocabulary-path", + type=str, + default="/tinynav/docs/Vocabulary/ORBvoc.txt", + help="DBoW3 vocabulary path for bow mode", + ) parsed_args, unknown_args = parser.parse_known_args(sys.argv[1:]) - node = MapNode(tinynav_db_path=parsed_args.tinynav_db_path, - tinynav_map_path=parsed_args.tinynav_map_path, - verbose_timer=parsed_args.verbose_timer) + + use_bow = parsed_args.loop_closure_use_bow or parsed_args.loop_closure_mode == "bow" + if use_bow: + parsed_args.loop_closure_mode = "bow" + if not os.path.exists(parsed_args.dbow3_vocabulary_path): + raise FileNotFoundError( + f"DBoW3 vocabulary file not found: {parsed_args.dbow3_vocabulary_path}" + ) + extractor = ORBFeatureTRTCompatible() + matcher = ORBMatcher() + embedding_extractor = DummyEmbeddingEngine() + else: + extractor = SuperPointTRT() + matcher = LightGlueTRT() + embedding_extractor = Dinov2TRT() + + node = MapNode( + tinynav_db_path=parsed_args.tinynav_db_path, + tinynav_map_path=parsed_args.tinynav_map_path, + extractor=extractor, + matcher=matcher, + embedding_extractor=embedding_extractor, + loop_closure_mode=parsed_args.loop_closure_mode, + loop_closure_use_bow=use_bow, + dbow3_vocabulary_path=parsed_args.dbow3_vocabulary_path, + verbose_timer=parsed_args.verbose_timer, + ) rclpy.spin(node) node.destroy_node() diff --git a/tinynav/core/models_trt.py b/tinynav/core/models_trt.py index 4d00c3d9..1d1743aa 100644 --- a/tinynav/core/models_trt.py +++ b/tinynav/core/models_trt.py @@ -1,4 +1,7 @@ -import tensorrt as trt +try: + import tensorrt as trt +except ImportError: + trt = None import numpy as np import cv2 from codetiming import Timer @@ -6,10 +9,16 @@ import asyncio from tinynav.core.func import alru_cache_numpy -from cuda import cudart +try: + from cuda import cudart +except ImportError: + cudart = None import ctypes import einops import logging +from typing import Any + +logger = logging.getLogger(__name__) numpy_to_ctypes = { np.dtype(np.float32): ctypes.c_float, @@ -39,6 +48,11 @@ def disparity_to_depth(disparity: np.ndarray, baseline: float, focal_length: flo class TRTBase: def __init__(self, engine_path): + if trt is None or cudart is None: + raise ImportError( + "tensorrt and cuda-python are required for TRT models. " + "Use BoW mode or install TensorRT Python bindings." + ) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: self.engine = runtime.deserialize_cuda_engine(f.read()) @@ -179,6 +193,230 @@ async def infer(self, input_image:np.ndarray, threshold = np.array([[0.0005]], d results["mask"] = results["mask"][:, :, None] return results + +class ORBFeatureTRTCompatible: + """ + ORB feature extractor with a SuperPoint-compatible output interface. + + Returns a dict with: + - kpts: (1, N, 2) float32 + - descps: (1, N, 32) float32 + - mask: (1, N, 1) float32 + """ + + def __init__(self, nfeatures: int = 1024): + self.nfeatures = int(nfeatures) + self.orb = cv2.ORB_create(nfeatures=self.nfeatures) + + async def infer(self, input_image: np.ndarray): + if input_image is None: + raise ValueError("input_image is None") + if input_image.ndim == 3: + gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY) + elif input_image.ndim == 2: + gray = input_image + else: + raise ValueError(f"Unsupported input_image ndim={input_image.ndim}") + + keypoints, descriptors = self.orb.detectAndCompute(gray, None) + if keypoints is None or len(keypoints) == 0: + return { + "kpts": np.zeros((1, 0, 2), dtype=np.float32), + "descps": np.zeros((1, 0, 32), dtype=np.float32), + "mask": np.zeros((1, 0, 1), dtype=np.float32), + } + + kpts = np.array([[kp.pt[0], kp.pt[1]] for kp in keypoints], dtype=np.float32) + if descriptors is None: + descps = np.zeros((kpts.shape[0], 32), dtype=np.float32) + else: + descps = descriptors.astype(np.float32) / 255.0 + + return { + "kpts": kpts[None, :, :], + "descps": descps[None, :, :], + "mask": np.ones((1, kpts.shape[0], 1), dtype=np.float32), + } + + +class ORBMatcher: + """ + ORB descriptor matcher with BFMatcher(crossCheck=True) and geometric RANSAC filtering. + + Output is LightGlue-compatible: + - match_indices: (1, N0) int32, each value is matched index in keypoints1 or -1. + """ + + def __init__(self, ransac_reproj_threshold: float = 1.0): + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + self.ransac_reproj_threshold = float(ransac_reproj_threshold) + + async def infer( + self, + kpts0: np.ndarray, + kpts1: np.ndarray, + desc0: np.ndarray, + desc1: np.ndarray, + mask0: np.ndarray, + mask1: np.ndarray, + img_shape0=None, + img_shape1=None, + match_threshold=None, + ): + k0 = np.asarray(kpts0[0], dtype=np.float32) + k1 = np.asarray(kpts1[0], dtype=np.float32) + if k0.ndim != 2 or k1.ndim != 2 or k0.shape[1] < 2 or k1.shape[1] < 2: + n0 = int(k0.shape[0]) if k0.ndim >= 1 else 0 + return {"match_indices": np.full((1, n0), -1, dtype=np.int32)} + # keep only xy coordinates + k0 = k0[:, :2] + k1 = k1[:, :2] + n0 = int(k0.shape[0]) + out = {"match_indices": np.full((1, n0), -1, dtype=np.int32)} + if n0 == 0 or k1.shape[0] == 0: + return out + + d0 = np.asarray(desc0[0], dtype=np.float32) + d1 = np.asarray(desc1[0], dtype=np.float32) + if d0.ndim != 2 or d1.ndim != 2 or d0.shape[0] == 0 or d1.shape[0] == 0: + return out + # Ensure descriptor rows and keypoints rows are aligned. + n0_valid = min(k0.shape[0], d0.shape[0]) + n1_valid = min(k1.shape[0], d1.shape[0]) + if n0_valid == 0 or n1_valid == 0: + return out + k0 = k0[:n0_valid] + k1 = k1[:n1_valid] + d0 = d0[:n0_valid] + d1 = d1[:n1_valid] + out = {"match_indices": np.full((1, n0_valid), -1, dtype=np.int32)} + + m0 = np.asarray(mask0[0, :, 0] > 0, dtype=bool) if mask0.size else np.ones((d0.shape[0],), dtype=bool) + m1 = np.asarray(mask1[0, :, 0] > 0, dtype=bool) if mask1.size else np.ones((d1.shape[0],), dtype=bool) + if m0.shape[0] != d0.shape[0]: + m0 = np.ones((d0.shape[0],), dtype=bool) + if m1.shape[0] != d1.shape[0]: + m1 = np.ones((d1.shape[0],), dtype=bool) + + idx0 = np.where(m0)[0] + idx1 = np.where(m1)[0] + if idx0.size == 0 or idx1.size == 0: + return out + + # ORB descriptor is binary; convert normalized float [0,1] back to uint8 [0,255]. + d0_u8 = np.clip(np.rint(d0[idx0] * 255.0), 0, 255).astype(np.uint8) + d1_u8 = np.clip(np.rint(d1[idx1] * 255.0), 0, 255).astype(np.uint8) + if d0_u8.size == 0 or d1_u8.size == 0: + return out + + raw_matches = self.bf.match(d0_u8, d1_u8) + if len(raw_matches) == 0: + return out + + raw_matches = sorted(raw_matches, key=lambda m: m.distance) + pts0 = np.array([k0[idx0[m.queryIdx]] for m in raw_matches], dtype=np.float32).reshape(-1, 2) + pts1 = np.array([k1[idx1[m.trainIdx]] for m in raw_matches], dtype=np.float32).reshape(-1, 2) + + inlier_mask = np.ones((len(raw_matches),), dtype=bool) + if len(raw_matches) >= 8 and np.isfinite(pts0).all() and np.isfinite(pts1).all(): + logger.info( + "ORBMatcher RANSAC input: pts0.shape=%s pts1.shape=%s raw_matches=%d", + pts0.shape, + pts1.shape, + len(raw_matches), + ) + _, ransac_inliers = cv2.findFundamentalMat( + pts0, + pts1, + cv2.FM_RANSAC, + self.ransac_reproj_threshold, + 0.99, + ) + if ransac_inliers is not None and ransac_inliers.size == len(raw_matches): + inlier_mask = ransac_inliers.reshape(-1).astype(bool) + + for i, m in enumerate(raw_matches): + if not inlier_mask[i]: + continue + gi = int(idx0[m.queryIdx]) + gj = int(idx1[m.trainIdx]) + out["match_indices"][0, gi] = gj + return out + + +class DBoW3Engine: + """ + Thin Python wrapper for PyDBoW3 database operations. + + Supports adding/querying either: + - ORBFeatureTRTCompatible outputs (dict with "descps") + - Raw descriptor arrays shaped (N, 32), dtype uint8/float32 + """ + + def __init__(self, vocabulary_path: str): + self._bow = self._import_module() + self.voc = self._bow.Vocabulary() + load_ret = self.voc.load(vocabulary_path) + # PyDBoW3 often returns None on success (instead of True). + if load_ret is False: + raise RuntimeError(f"Failed to load DBoW3 vocabulary: {vocabulary_path}") + self.db = self._bow.Database() + self.db.setVocabulary(self.voc) + + @staticmethod + def _import_module(): + try: + import pydbow3 as bow # type: ignore + return bow + except Exception: + import pyDBoW3 as bow # type: ignore + return bow + + @staticmethod + def _normalize_desc(features: Any) -> np.ndarray: + if isinstance(features, dict): + if "descps" not in features: + raise ValueError("features dict must contain 'descps'") + desc = np.asarray(features["descps"]) + if desc.ndim == 3: + desc = desc[0] + else: + desc = np.asarray(features) + + if desc.ndim != 2: + raise ValueError(f"Descriptor array must be 2D, got shape {desc.shape}") + if desc.shape[1] != 32: + raise ValueError(f"Expected ORB descriptor width 32, got {desc.shape[1]}") + if desc.size == 0: + return np.zeros((0, 32), dtype=np.uint8) + + if desc.dtype == np.uint8: + return np.ascontiguousarray(desc) + # ORBFeatureTRTCompatible emits float32 in [0,1], convert back to binary-bytes domain. + if np.issubdtype(desc.dtype, np.floating): + return np.ascontiguousarray(np.clip(np.rint(desc * 255.0), 0, 255).astype(np.uint8)) + return np.ascontiguousarray(desc.astype(np.uint8)) + + def add(self, features: Any): + desc = self._normalize_desc(features) + return self.db.add(desc) + + def query(self, features: Any, max_results: int = 10): + desc = self._normalize_desc(features) + try: + results = self.db.query(desc, int(max_results)) + except TypeError: + # Some bindings expose query(desc) only. + results = self.db.query(desc) + return [ + { + "id": int(getattr(r, "Id", getattr(r, "id", -1))), + "score": float(getattr(r, "Score", getattr(r, "score", 0.0))), + } + for r in results + ] + + class LightGlueTRT(TRTBase): def __init__(self, engine_path=f"/tinynav/tinynav/models/lightglue_fp16_{platform.machine()}.plan"): super().__init__(engine_path)