Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/constants.ts
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v1 frontend was deprecated, so you can skip updating anything in datahub-web-react/src/app/ingest/source/

just keep udpates in datahub-web-react/src/app/ingestV2/

Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import fivetranLogo from '@images/fivetranlogo.png';
import glueLogo from '@images/gluelogo.png';
import googleSheetsLogo from '@images/google-sheets-logo.png';
import grafanaLogo from '@images/grafana.png';
import hbaseLogo from '@images/hbaselogo.png';
import hiveLogo from '@images/hivelogo.png';
import kafkaLogo from '@images/kafkalogo.png';
import lookerLogo from '@images/lookerlogo.svg';
Expand Down Expand Up @@ -75,6 +76,8 @@ export const GLUE = 'glue';
export const GLUE_URN = `urn:li:dataPlatform:${GLUE}`;
export const GRAFANA = 'grafana';
export const GRAFANA_URN = `urn:li:dataPlatform:${GRAFANA}`;
export const HBASE = 'hbase';
export const HBASE_URN = `urn:li:dataPlatform:${HBASE}`;
export const HIVE = 'hive';
export const HIVE_URN = `urn:li:dataPlatform:${HIVE}`;
export const KAFKA = 'kafka';
Expand Down Expand Up @@ -170,6 +173,7 @@ export const PLATFORM_URN_TO_LOGO = {
[FEAST_URN]: feastLogo,
[GLUE_URN]: glueLogo,
[GRAFANA_URN]: grafanaLogo,
[HBASE_URN]: hbaseLogo,
[HIVE_URN]: hiveLogo,
[KAFKA_URN]: kafkaLogo,
[LOOKER_URN]: lookerLogo,
Expand Down
8 changes: 8 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/hive/",
"recipe": "source: \n type: hive\n config:\n # Coordinates\n host_port: # Your Hive host and port, e.g. hive:10000\n database: # Your Hive database name, e.g. SampleDatabase (Optional, if not specified, ingests from all databases)\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Hive username, e.g. admin\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:hbase",
"name": "hbase",
"displayName": "HBase",
"description": "Import Namespaces, Tables, Column Families, and metadata from Apache HBase.",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/hbase/",
"recipe": "source: \n type: hbase\n config:\n # Coordinates\n host: localhost # Your HBase Thrift server hostname\n port: 9090 # Your HBase Thrift server port\n\n # Optional configurations\n use_ssl: false\n auth_mechanism: null # Options: null, KERBEROS, or custom\n\n # Schema extraction\n include_column_families: true\n max_column_qualifiers: 100\n\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:presto",
"name": "presto",
Expand Down
42 changes: 42 additions & 0 deletions datahub-web-react/src/app/ingest/source/conf/hbase/hbase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { SourceConfig } from '@app/ingest/source/conf/types';

import hbaseLogo from '@images/hbaselogo.png';

const placeholderRecipe = `\
source:
type: hbase
config:
# Coordinates
host: # Your HBase Thrift server host, e.g. localhost
port: 9090 # Your HBase Thrift server port (default: 9090)

# Optional: Filter patterns
namespace_pattern:
allow:
- ".*" # Allow all namespaces
table_pattern:
allow:
- ".*" # Allow all tables

# Optional: Authentication
# auth_mechanism: # Authentication mechanism (e.g., KERBEROS)

# Optional: Schema extraction
include_column_families: true # Include column families in schema metadata
max_column_qualifiers: 100 # Maximum column qualifiers to sample

stateful_ingestion:
enabled: true
`;

export const HBASE = 'hbase';

const hbaseConfig: SourceConfig = {
type: HBASE,
placeholderRecipe,
displayName: 'HBase',
docsUrl: 'https://docs.datahub.com/docs/generated/ingestion/sources/hbase/',
logoUrl: hbaseLogo,
};

export default hbaseConfig;
2 changes: 2 additions & 0 deletions datahub-web-react/src/app/ingest/source/conf/sources.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import azureAdConfig from '@app/ingest/source/conf/azure/azure';
import bigqueryConfig from '@app/ingest/source/conf/bigquery/bigquery';
import csvConfig from '@app/ingest/source/conf/csv/csv';
import glueConfig from '@app/ingest/source/conf/glue/glue';
import hbaseConfig from '@app/ingest/source/conf/hbase/hbase';
import hiveConfig from '@app/ingest/source/conf/hive/hive';
import kafkaConfig from '@app/ingest/source/conf/kafka/kafka';
import lookerConfig from '@app/ingest/source/conf/looker/looker';
Expand Down Expand Up @@ -49,6 +50,7 @@ export const SOURCE_TEMPLATE_CONFIGS: Array<SourceConfig> = [
oktaConfig,
glueConfig,
oracleConfig,
hbaseConfig,
hiveConfig,
csvConfig,
sacConfig,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ import {
TARGET_PLATFORM,
TARGET_PLATFORM_INSTANCE,
} from '@app/ingestV2/source/builder/RecipeForm/dbt_cloud';
import {
HBASE_AUTH_MECHANISM,
HBASE_HOST,
HBASE_INCLUDE_COLUMN_FAMILIES,
HBASE_MAX_COLUMN_QUALIFIERS,
HBASE_PORT,
HBASE_USE_SSL,
NAMESPACE_ALLOW,
NAMESPACE_DENY,
} from '@app/ingestV2/source/builder/RecipeForm/hbase';
import {
HIVE_DATABASE,
HIVE_HOST_PORT,
Expand Down Expand Up @@ -235,6 +245,7 @@ import {
CSV,
DATABRICKS,
DBT_CLOUD,
HBASE,
MYSQL,
OKTA,
POWER_BI,
Expand Down Expand Up @@ -423,6 +434,16 @@ export const RECIPE_FIELDS: RecipeFields = {
advancedFields: [INCLUDE_TABLES, TABLE_PROFILING_ENABLED, COLUMN_PROFILING_ENABLED, STATEFUL_INGESTION_ENABLED],
filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.',
},
[HBASE]: {
fields: [HBASE_HOST, HBASE_PORT, HBASE_USE_SSL, HBASE_AUTH_MECHANISM],
filterFields: [NAMESPACE_ALLOW, NAMESPACE_DENY, TABLE_ALLOW, TABLE_DENY],
advancedFields: [
HBASE_INCLUDE_COLUMN_FAMILIES,
HBASE_MAX_COLUMN_QUALIFIERS,
STATEFUL_INGESTION_ENABLED,
],
filterSectionTooltip: 'Include or exclude specific Namespaces and Tables from ingestion.',
},
[PRESTO]: {
fields: [PRESTO_HOST_PORT, PRESTO_USERNAME, PRESTO_PASSWORD, PRESTO_DATABASE],
filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { RecipeField, FieldType, setListValuesOnRecipe } from './common';

export const HBASE_HOST: RecipeField = {
name: 'host',
label: 'Host',
tooltip: 'HBase Thrift server hostname or IP address',
type: FieldType.TEXT,
fieldPath: 'source.config.host',
placeholder: 'localhost',
required: true,
rules: null,
};

export const HBASE_PORT: RecipeField = {
name: 'port',
label: 'Port',
tooltip: 'HBase Thrift server port (default: 9090 for Thrift1)',
type: FieldType.TEXT,
fieldPath: 'source.config.port',
placeholder: '9090',
required: false,
rules: null,
};

export const HBASE_USE_SSL: RecipeField = {
name: 'use_ssl',
label: 'Use SSL',
tooltip: 'Whether to use SSL/TLS for connection',
type: FieldType.BOOLEAN,
fieldPath: 'source.config.use_ssl',
required: false,
rules: null,
};

export const HBASE_AUTH_MECHANISM: RecipeField = {
name: 'auth_mechanism',
label: 'Authentication Mechanism',
tooltip: 'Authentication mechanism (None, KERBEROS, or custom)',
type: FieldType.TEXT,
fieldPath: 'source.config.auth_mechanism',
placeholder: 'KERBEROS',
required: false,
rules: null,
};

export const NAMESPACE_ALLOW: RecipeField = {
name: 'namespace_pattern.allow',
label: 'Allow Patterns for Namespace',
tooltip:
'Use regex here. e.g. to allow all namespaces, use ".*" or to allow namespaces starting with "production" use "production.*"',
placeholder: '.*',
type: FieldType.LIST,
buttonLabel: 'Add pattern',
fieldPath: 'source.config.namespace_pattern.allow',
rules: null,
section: 'Namespaces',
setValueOnRecipeOverride: (recipe: any, values: string[]) =>
setListValuesOnRecipe(recipe, values, 'source.config.namespace_pattern.allow'),
};

export const NAMESPACE_DENY: RecipeField = {
name: 'namespace_pattern.deny',
label: 'Deny Patterns for Namespace',
tooltip:
'Use regex here. Deny patterns take precedence over allow patterns. e.g. to deny all system namespaces, use "system.*"',
placeholder: 'system.*',
type: FieldType.LIST,
buttonLabel: 'Add pattern',
fieldPath: 'source.config.namespace_pattern.deny',
rules: null,
section: 'Namespaces',
setValueOnRecipeOverride: (recipe: any, values: string[]) =>
setListValuesOnRecipe(recipe, values, 'source.config.namespace_pattern.deny'),
};

export const HBASE_INCLUDE_COLUMN_FAMILIES: RecipeField = {
name: 'include_column_families',
label: 'Include Column Families',
tooltip: 'Whether to include column families as schema metadata',
type: FieldType.BOOLEAN,
fieldPath: 'source.config.include_column_families',
required: false,
rules: null,
};

export const HBASE_MAX_COLUMN_QUALIFIERS: RecipeField = {
name: 'max_column_qualifiers',
label: 'Max Column Qualifiers',
tooltip: 'Maximum number of column qualifiers to sample per column family',
type: FieldType.TEXT,
fieldPath: 'source.config.max_column_qualifiers',
placeholder: '100',
required: false,
rules: null,
};
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import elasticsearchLogo from '@images/elasticsearchlogo.png';
import feastLogo from '@images/feastlogo.png';
import fivetranLogo from '@images/fivetranlogo.png';
import glueLogo from '@images/gluelogo.png';
import hbaseLogo from '@images/hbaselogo.png';
import hiveLogo from '@images/hivelogo.png';
import kafkaLogo from '@images/kafkalogo.png';
import lookerLogo from '@images/lookerlogo.svg';
Expand Down Expand Up @@ -69,6 +70,8 @@ export const FEAST_LEGACY = 'feast-legacy';
export const FEAST_URN = `urn:li:dataPlatform:${FEAST}`;
export const GLUE = 'glue';
export const GLUE_URN = `urn:li:dataPlatform:${GLUE}`;
export const HBASE = 'hbase';
export const HBASE_URN = `urn:li:dataPlatform:${HBASE}`;
export const HIVE = 'hive';
export const HIVE_URN = `urn:li:dataPlatform:${HIVE}`;
export const KAFKA = 'kafka';
Expand Down Expand Up @@ -159,6 +162,7 @@ export const PLATFORM_URN_TO_LOGO = {
[ELASTICSEARCH_URN]: elasticsearchLogo,
[FEAST_URN]: feastLogo,
[GLUE_URN]: glueLogo,
[HBASE_URN]: hbaseLogo,
[HIVE_URN]: hiveLogo,
[KAFKA_URN]: kafkaLogo,
[LOOKER_URN]: lookerLogo,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/hive/",
"recipe": "source: \n type: hive\n config:\n # Coordinates\n host_port: # Your Hive host and port, e.g. hive:10000\n database: # Your Hive database name, e.g. SampleDatabase (Optional, if not specified, ingests from all databases)\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Hive username, e.g. admin\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:hbase",
"name": "hbase",
"displayName": "HBase",
"description": "Import Namespaces, Tables, Column Families, and metadata from Apache HBase.",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/hbase/",
"recipe": "source: \n type: hbase\n config:\n # Coordinates\n host: localhost # Your HBase Thrift server hostname\n port: 9090 # Your HBase Thrift server port\n\n # Optional configurations\n use_ssl: false\n auth_mechanism: null # Options: null, KERBEROS, or custom\n\n # Schema extraction\n include_column_families: true\n max_column_qualifiers: 100\n\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:presto",
"name": "presto",
Expand Down
42 changes: 42 additions & 0 deletions datahub-web-react/src/app/ingestV2/source/conf/hbase/hbase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { SourceConfig } from '@app/ingestV2/source/conf/types';

import hbaseLogo from '@images/hbaselogo.png';

const placeholderRecipe = `\
source:
type: hbase
config:
# Coordinates
host: # Your HBase Thrift server host, e.g. localhost
port: 9090 # Your HBase Thrift server port (default: 9090)

# Optional: Filter patterns
namespace_pattern:
allow:
- ".*" # Allow all namespaces
table_pattern:
allow:
- ".*" # Allow all tables

# Optional: Authentication
# auth_mechanism: # Authentication mechanism (e.g., KERBEROS)

# Optional: Schema extraction
include_column_families: true # Include column families in schema metadata
max_column_qualifiers: 100 # Maximum column qualifiers to sample

stateful_ingestion:
enabled: true
`;

export const HBASE = 'hbase';

const hbaseConfig: SourceConfig = {
type: HBASE,
placeholderRecipe,
displayName: 'HBase',
docsUrl: 'https://docs.datahub.com/docs/generated/ingestion/sources/hbase/',
logoUrl: hbaseLogo,
};

export default hbaseConfig;
2 changes: 2 additions & 0 deletions datahub-web-react/src/app/ingestV2/source/conf/sources.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import azureAdConfig from '@app/ingestV2/source/conf/azure/azure';
import bigqueryConfig from '@app/ingestV2/source/conf/bigquery/bigquery';
import csvConfig from '@app/ingestV2/source/conf/csv/csv';
import glueConfig from '@app/ingestV2/source/conf/glue/glue';
import hbaseConfig from '@app/ingestV2/source/conf/hbase/hbase';
import hiveConfig from '@app/ingestV2/source/conf/hive/hive';
import kafkaConfig from '@app/ingestV2/source/conf/kafka/kafka';
import lookerConfig from '@app/ingestV2/source/conf/looker/looker';
Expand Down Expand Up @@ -48,6 +49,7 @@ export const SOURCE_TEMPLATE_CONFIGS: Array<SourceConfig> = [
oktaConfig,
glueConfig,
oracleConfig,
hbaseConfig,
hiveConfig,
csvConfig,
sacConfig,
Expand Down
Binary file added datahub-web-react/src/images/hbaselogo.png
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this rectangle shape may not fit well in the UI, have you checked?
a logo that fits better in a square/circle shape would look better

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resized to square

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
57 changes: 57 additions & 0 deletions metadata-ingestion/docs/sources/hbase/hbase_pre.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
### Setup

This integration extracts metadata from Apache HBase via the Thrift API using the `happybase` Python library, including information about:

- Namespaces
- Tables
- Column families and their properties
- Table configurations

You'll need to have HBase Thrift server running and accessible with appropriate permissions.

#### Prerequisites

1. **Install Required Python Packages**:

```bash
pip install 'acryl-datahub[hbase]'
```

This will install the `happybase` package required for connecting to HBase.

2. **HBase Thrift Server**:

- Ensure the HBase Thrift server is running (typically on port 9090).
- Start the Thrift server if not already running:
```bash
hbase thrift start -p 9090
```

3. **Network Access**:

- The host running DataHub ingestion must have network access to the HBase Thrift server.
- Verify connectivity:
```bash
telnet <hbase-host> 9090
```

4. **Permissions**:
- The user/service account must have read access to:
- System tables for metadata extraction
- Target namespaces and tables you want to ingest

#### Authentication

The current implementation supports unauthenticated connections to HBase Thrift server.

:::note

For production deployments, it's recommended to use secure connections and ensure your HBase Thrift server is properly secured with network-level access controls.

:::

:::info

The connector extracts column family metadata but does not sample individual column qualifiers. This ensures efficient metadata extraction without impacting HBase performance.

:::
Loading
Loading