Skip to content

Commit 64f583a

Browse files
authored
Add datafusion-json-functions as optional extension (#143)
Closes #130 Changes: 1. Integrates the functions https://github.com/datafusion-contrib/datafusion-functions-json 2. Adds some basic testing for these extensions (I need to figure out how to do something similar for s3 and delta)
1 parent b419d8c commit 64f583a

File tree

10 files changed

+464
-54
lines changed

10 files changed

+464
-54
lines changed

Cargo.lock

Lines changed: 157 additions & 32 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ color-eyre = "0.6.3"
2121
crossterm = { version = "0.28.1", features = ["event-stream"] }
2222
datafusion = "41.0.0"
2323
datafusion-common = "41.0.0"
24+
datafusion-functions-json = { version = "0.41.0", optional = true }
2425
deltalake = { version = "0.19.0", features = ["datafusion"], optional = true }
2526
directories = "5.0.1"
2627
env_logger = "0.11.5"
@@ -43,13 +44,15 @@ url = { version = "2.5.2", optional = true }
4344

4445
[dev-dependencies]
4546
assert_cmd = "2.0.16"
47+
insta = { version = "1.40.0", features = ["yaml"] }
4648
predicates = "3.1.2"
4749
tempfile = "3.2.0"
4850

4951
[features]
5052
deltalake = ["dep:deltalake"]
5153
flightsql = ["dep:arrow-flight", "dep:tonic"]
5254
s3 = ["object_store/aws", "url"]
55+
functions-json = ["dep:datafusion-functions-json"]
5356
url = ["dep:url"]
5457

5558
[[bin]]

README.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,18 @@ Currently, the only supported packaging is on [crates.io](https://crates.io/sear
9494

9595
Once installed you can run `dft` to start the application.
9696

97-
#### Features
97+
#### Optional Features (Rust Crate Features)
98+
99+
`dft` has several optional (conditionally compiled features) integrations which are controlled by [Rust Crate Features]
100+
101+
To build with all features, you can run
102+
103+
```shell
104+
cargo install --path . --all-features
105+
````
106+
107+
[Rust Crate Features]: https://doc.rust-lang.org/cargo/reference/features.html
108+
98109

99110
#### S3 (`--features=s3`)
100111

@@ -143,6 +154,18 @@ Register deltalake tables. For example:
143154
CREATE EXTERNAL TABLE table_name STORED AS DELTATABLE LOCATION 's3://bucket/table'
144155
```
145156

157+
#### Json Functions (`--features=function-json`)
158+
159+
Adds functions from [datafusion-function-json] for querying JSON strings in DataFusion in `dft`. For example:
160+
161+
```sql
162+
select * from foo where json_get(attributes, 'bar')::string='ham'
163+
(show examples of using operators too)
164+
```
165+
166+
[datafusion-function-json]: https://github.com/datafusion-contrib/datafusion-functions-json
167+
168+
146169
### Config
147170
148171
The `dft` configuration is stored in `~/.config/dft/config.toml`

src/app/state/mod.rs

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ pub mod tabs;
1919

2020
use crate::app::state::tabs::sql::SQLTabState;
2121
use crate::app::ui::SelectedTab;
22-
use crate::config::get_data_dir;
2322
use log::{debug, error, info};
2423
use std::path::PathBuf;
2524

@@ -46,7 +45,6 @@ impl Default for Tabs {
4645
pub struct AppState<'app> {
4746
pub config: AppConfig,
4847
pub should_quit: bool,
49-
pub data_dir: PathBuf,
5048
pub sql_tab: SQLTabState<'app>,
5149
#[cfg(feature = "flightsql")]
5250
pub flightsql_tab: FlightSQLTabState<'app>,
@@ -57,7 +55,6 @@ pub struct AppState<'app> {
5755

5856
pub fn initialize<'app>(config_path: PathBuf) -> AppState<'app> {
5957
debug!("Initializing state");
60-
let data_dir = get_data_dir();
6158
debug!("Config path: {:?}", config_path);
6259
let config = if config_path.exists() {
6360
debug!("Config exists");
@@ -82,24 +79,28 @@ pub fn initialize<'app>(config_path: PathBuf) -> AppState<'app> {
8279
debug!("No config, using default");
8380
AppConfig::default()
8481
};
82+
AppState::new(config)
83+
}
8584

86-
let tabs = Tabs::default();
87-
88-
let sql_tab_state = SQLTabState::new();
89-
#[cfg(feature = "flightsql")]
90-
let flightsql_tab_state = FlightSQLTabState::new();
91-
let logs_tab_state = LogsTabState::default();
92-
let history_tab_state = HistoryTabState::default();
85+
impl<'app> AppState<'app> {
86+
pub fn new(config: AppConfig) -> Self {
87+
let tabs = Tabs::default();
9388

94-
AppState {
95-
config,
96-
data_dir,
97-
tabs,
98-
sql_tab: sql_tab_state,
89+
let sql_tab_state = SQLTabState::new();
9990
#[cfg(feature = "flightsql")]
100-
flightsql_tab: flightsql_tab_state,
101-
logs_tab: logs_tab_state,
102-
history_tab: history_tab_state,
103-
should_quit: false,
91+
let flightsql_tab_state = FlightSQLTabState::new();
92+
let logs_tab_state = LogsTabState::default();
93+
let history_tab_state = HistoryTabState::default();
94+
95+
AppState {
96+
config,
97+
tabs,
98+
sql_tab: sql_tab_state,
99+
#[cfg(feature = "flightsql")]
100+
flightsql_tab: flightsql_tab_state,
101+
logs_tab: logs_tab_state,
102+
history_tab: history_tab_state,
103+
should_quit: false,
104+
}
104105
}
105106
}

src/execution/mod.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,19 @@ impl ExecutionContext {
5858
/// Construct a new `ExecutionContext` with the specified configuration
5959
pub fn try_new(config: &ExecutionConfig) -> Result<Self> {
6060
let mut builder = DftSessionStateBuilder::new();
61-
for extension in enabled_extensions() {
61+
let extensions = enabled_extensions();
62+
for extension in &extensions {
6263
builder = extension.register(config, builder)?;
6364
}
6465

6566
let state = builder.build()?;
66-
let session_ctx = SessionContext::new_with_state(state);
67+
let mut session_ctx = SessionContext::new_with_state(state);
68+
69+
// Apply any additional setup to the session context (e.g. registering
70+
// functions)
71+
for extension in &extensions {
72+
extension.register_on_ctx(config, &mut session_ctx)?;
73+
}
6774

6875
Ok(Self {
6976
session_ctx,

src/extensions/functions_json.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! [datafusion-function-json] Integration: [JsonFunctionsExtension]
19+
//!
20+
//! [datafusion-function-json]: https://github.com/datafusion-contrib/datafusion-functions-json
21+
22+
use crate::config::ExecutionConfig;
23+
use crate::extensions::{DftSessionStateBuilder, Extension};
24+
use datafusion::prelude::SessionContext;
25+
use datafusion_common::Result;
26+
27+
#[derive(Debug, Default)]
28+
pub struct JsonFunctionsExtension {}
29+
30+
impl JsonFunctionsExtension {
31+
pub fn new() -> Self {
32+
Self {}
33+
}
34+
}
35+
36+
impl Extension for JsonFunctionsExtension {
37+
fn register(
38+
&self,
39+
_config: &ExecutionConfig,
40+
builder: DftSessionStateBuilder,
41+
) -> datafusion_common::Result<DftSessionStateBuilder> {
42+
//
43+
Ok(builder)
44+
}
45+
46+
fn register_on_ctx(&self, _config: &ExecutionConfig, ctx: &mut SessionContext) -> Result<()> {
47+
datafusion_functions_json::register_all(ctx)?;
48+
Ok(())
49+
}
50+
}

src/extensions/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@
1919
2020
use crate::config::ExecutionConfig;
2121
use datafusion::common::Result;
22+
use datafusion::prelude::SessionContext;
2223
use std::fmt::Debug;
2324

2425
mod builder;
2526
#[cfg(feature = "deltalake")]
2627
mod deltalake;
28+
#[cfg(feature = "functions-json")]
29+
mod functions_json;
2730
#[cfg(feature = "s3")]
2831
mod s3;
2932

@@ -36,6 +39,13 @@ pub trait Extension: Debug {
3639
_config: &ExecutionConfig,
3740
_builder: DftSessionStateBuilder,
3841
) -> Result<DftSessionStateBuilder>;
42+
43+
/// Registers this extension after the SessionContext has been created
44+
/// (this is to match the historic way many extensions were registered)
45+
/// TODO file a ticket upstream to use the builder pattern
46+
fn register_on_ctx(&self, _config: &ExecutionConfig, _ctx: &mut SessionContext) -> Result<()> {
47+
Ok(())
48+
}
3949
}
4050

4151
/// Return all extensions currently enabled
@@ -45,5 +55,7 @@ pub fn enabled_extensions() -> Vec<Box<dyn Extension>> {
4555
Box::new(s3::AwsS3Extension::new()),
4656
#[cfg(feature = "deltalake")]
4757
Box::new(deltalake::DeltaLakeExtension::new()),
58+
#[cfg(feature = "functions-json")]
59+
Box::new(functions_json::JsonFunctionsExtension::new()),
4860
]
4961
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Tests for datafusion-function-json integration
19+
20+
use crate::TestExecution;
21+
22+
static TEST_TABLE: &str = r#"
23+
CREATE TABLE test_table (
24+
id INT,
25+
json_col VARCHAR
26+
) AS VALUES
27+
(1, '{}'),
28+
(2, '{ "a": 1 }'),
29+
(3, '{ "a": 2 }'),
30+
(4, '{ "a": 1, "b": 2 }'),
31+
(5, '{ "a": 1, "b": 2, "c": 3 }')
32+
"#;
33+
34+
/// Ensure one of the functions `json_contains` function is properly registered
35+
#[tokio::test]
36+
async fn test_basic() {
37+
let mut execution = TestExecution::new().with_setup(TEST_TABLE).await;
38+
39+
let actual = execution
40+
.run_and_format("SELECT id, json_contains(json_col, 'b') as json_contains FROM test_table")
41+
.await;
42+
43+
insta::assert_yaml_snapshot!(actual, @r###"
44+
- +----+---------------+
45+
- "| id | json_contains |"
46+
- +----+---------------+
47+
- "| 1 | false |"
48+
- "| 2 | false |"
49+
- "| 3 | false |"
50+
- "| 4 | true |"
51+
- "| 5 | true |"
52+
- +----+---------------+
53+
"###);
54+
}
55+
56+
/// ensure the json operators like -> are properly registered
57+
#[tokio::test]
58+
async fn test_operators() {
59+
let mut execution = TestExecution::new().with_setup(TEST_TABLE).await;
60+
61+
let actual = execution
62+
.run_and_format("SELECT id, json_col->'a' as json_col_a FROM test_table")
63+
.await;
64+
65+
insta::assert_yaml_snapshot!(actual, @r###"
66+
- +----+------------+
67+
- "| id | json_col_a |"
68+
- +----+------------+
69+
- "| 1 | {null=} |"
70+
- "| 2 | {int=1} |"
71+
- "| 3 | {int=2} |"
72+
- "| 4 | {int=1} |"
73+
- "| 5 | {int=1} |"
74+
- +----+------------+
75+
"###);
76+
}

tests/extension_cases/mod.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#[cfg(feature = "functions-json")]
19+
mod functions_json;

0 commit comments

Comments
 (0)