| 
 | 1 | +import logging  | 
 | 2 | + | 
 | 3 | +from yarl import URL  | 
 | 4 | + | 
 | 5 | +from cratedb_toolkit.io.ingestr.boot import import_ingestr  | 
 | 6 | +from cratedb_toolkit.model import DatabaseAddress  | 
 | 7 | + | 
 | 8 | +logger = logging.getLogger(__name__)  | 
 | 9 | + | 
 | 10 | + | 
 | 11 | +ingestr_available, ingestr, ConfigFieldMissingException = import_ingestr()  | 
 | 12 | + | 
 | 13 | + | 
 | 14 | +def ingestr_select(source_url: str) -> bool:  | 
 | 15 | +    """  | 
 | 16 | +    Whether to select `ingestr` for this data source.  | 
 | 17 | +    """  | 
 | 18 | +    if not ingestr_available:  | 
 | 19 | +        return False  | 
 | 20 | +    try:  | 
 | 21 | +        factory = ingestr.src.factory.SourceDestinationFactory(source_url, "csv:////tmp/foobar.csv")  | 
 | 22 | +        factory.get_source()  | 
 | 23 | +        scheme = ingestr.src.factory.parse_scheme_from_uri(source_url)  | 
 | 24 | +        logger.info(f"Selecting ingestr for source scheme: {scheme}")  | 
 | 25 | +        return True  | 
 | 26 | +    except (ImportError, ValueError, AttributeError) as ex:  | 
 | 27 | +        if "Unsupported source scheme" in str(ex):  | 
 | 28 | +            logger.debug(f"Failed to select ingestr for source url '{source_url}': {ex}")  | 
 | 29 | +        else:  | 
 | 30 | +            logger.exception(f"Unexpected error with ingestr for source url: {source_url}")  | 
 | 31 | +        return False  | 
 | 32 | +    except Exception:  | 
 | 33 | +        logger.exception(f"Unexpected error with ingestr for source url: {source_url}")  | 
 | 34 | +        return False  | 
 | 35 | + | 
 | 36 | + | 
 | 37 | +def ingestr_copy(source_url: str, target_address: DatabaseAddress, progress: bool = False):  | 
 | 38 | +    """  | 
 | 39 | +    Invoke data transfer to CrateDB from any source provided by `ingestr`.  | 
 | 40 | +
  | 
 | 41 | +    https://cratedb-toolkit.readthedocs.io/io/ingestr/  | 
 | 42 | +
  | 
 | 43 | +    Synopsis:  | 
 | 44 | +
  | 
 | 45 | +        ctk load table \  | 
 | 46 | +            "frankfurter://?base=EUR&table=latest" \  | 
 | 47 | +            --cluster-url="crate://crate:na@localhost:4200/testdrive/exchange_latest"  | 
 | 48 | +
  | 
 | 49 | +        ctk load table \  | 
 | 50 | +            "frankfurter://?base=EUR&table=currencies" \  | 
 | 51 | +            --cluster-url="crate://crate:na@localhost:4200/testdrive/exchange_currencies"  | 
 | 52 | +
  | 
 | 53 | +        ctk load table \  | 
 | 54 | +            "postgresql://pguser:[email protected]:5432/postgres?table=public.diamonds" \  | 
 | 55 | +            --cluster-url="crate://crate:na@localhost:4200/testdrive/ibis_diamonds"  | 
 | 56 | +    """  | 
 | 57 | + | 
 | 58 | +    # Sanity checks.  | 
 | 59 | +    if not ingestr_available:  | 
 | 60 | +        raise ModuleNotFoundError("ingestr subsystem not installed")  | 
 | 61 | + | 
 | 62 | +    # Compute source and target URLs and table names.  | 
 | 63 | +    # Table names use dotted notation `<schema>.<table>`.  | 
 | 64 | + | 
 | 65 | +    source_url_obj = URL(source_url)  | 
 | 66 | +    source_table = source_url_obj.query.get("table")  | 
 | 67 | +    source_fragment = source_url_obj.fragment  | 
 | 68 | +    source_url_obj = source_url_obj.without_query_params("table").with_fragment("")  | 
 | 69 | + | 
 | 70 | +    target_uri, target_table_address = target_address.decode()  | 
 | 71 | +    target_table = target_table_address.fullname  | 
 | 72 | +    target_url = target_address.to_ingestr_url()  | 
 | 73 | + | 
 | 74 | +    if not source_table:  | 
 | 75 | +        raise ValueError("Source table is required")  | 
 | 76 | +    if not target_table:  | 
 | 77 | +        target_table = source_table  | 
 | 78 | + | 
 | 79 | +    if source_fragment:  | 
 | 80 | +        source_table += f"#{source_fragment}"  | 
 | 81 | + | 
 | 82 | +    logger.info("Invoking ingestr")  | 
 | 83 | +    logger.info(f"Source URL: {source_url_obj}")  | 
 | 84 | +    logger.info(f"Target URL: {target_url}")  | 
 | 85 | +    logger.info(f"Source Table: {source_table}")  | 
 | 86 | +    logger.info(f"Target Table: {target_table}")  | 
 | 87 | + | 
 | 88 | +    try:  | 
 | 89 | +        ingestr.main.ingest(  | 
 | 90 | +            source_uri=str(source_url_obj),  | 
 | 91 | +            dest_uri=str(target_url),  | 
 | 92 | +            source_table=source_table,  | 
 | 93 | +            dest_table=target_table,  | 
 | 94 | +            yes=True,  | 
 | 95 | +        )  | 
 | 96 | +        return True  | 
 | 97 | +    except ConfigFieldMissingException:  | 
 | 98 | +        logger.error(  | 
 | 99 | +            "A configuration field is missing. Please ensure all required credentials are provided. "  | 
 | 100 | +            "For example, if your account does not use a password, use a dummy password `na` like "  | 
 | 101 | +            "`export CRATEDB_CLUSTER_URL=crate://crate:na@localhost:4200/testdrive`"  | 
 | 102 | +        )  | 
 | 103 | +        raise  | 
 | 104 | +    except Exception as ex:  | 
 | 105 | +        logger.exception(f"Failed to ingest data: {ex}")  | 
 | 106 | +        return False  | 
0 commit comments