diff --git a/Cargo.toml b/Cargo.toml index 49703b47..b72ea04e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,8 +25,8 @@ lockfree = { version = "0.5.1" } worktable_codegen = { path = "codegen", version = "0.5.5" } futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4"] } -data_bucket = "0.2.3" -# data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "indexset-version-update" } +#data_bucket = "0.2.3" +data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } # data_bucket = { path = "../DataBucket", version = "0.2.2" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } diff --git a/codegen/src/persist_index/generator.rs b/codegen/src/persist_index/generator.rs index 34afe1e2..d4276f4d 100644 --- a/codegen/src/persist_index/generator.rs +++ b/codegen/src/persist_index/generator.rs @@ -393,7 +393,7 @@ impl Generator { }; quote! { let size = get_index_page_size_from_data_length::<#ty>(#const_name); - let #i: #t<_, Link> = #t::new(); + let #i: #t<_, Link> = #t::with_maximum_node_size(size); for page in persisted.#i.1 { #node } diff --git a/codegen/src/persist_table/generator/space_file/mod.rs b/codegen/src/persist_table/generator/space_file/mod.rs index 34f39064..f2d6136d 100644 --- a/codegen/src/persist_table/generator/space_file/mod.rs +++ b/codegen/src/persist_table/generator/space_file/mod.rs @@ -154,11 +154,12 @@ impl Generator { let engine_ident = name_generator.get_persistence_engine_ident(); let dir_name = name_generator.get_dir_name(); let const_name = name_generator.get_page_inner_size_const_ident(); + let pk_type = name_generator.get_primary_key_type_ident(); let pk_map = if self.attributes.pk_unsized { let pk_ident = &self.pk_ident; quote! { - let pk_map = IndexMap::<#pk_ident, Link, UnsizedNode<_>>::new(); + let pk_map = IndexMap::<#pk_ident, Link, UnsizedNode<_>>::with_maximum_node_size(#const_name); for page in self.primary_index.1 { let node = page.inner.get_node(); pk_map.attach_node(UnsizedNode::from_inner(node, #const_name)); @@ -166,7 +167,8 @@ impl Generator { } } else { quote! { - let pk_map = IndexMap::new(); + let size = get_index_page_size_from_data_length::<#pk_type>(#const_name); + let pk_map = IndexMap::with_maximum_node_size(size); for page in self.primary_index.1 { let node = page.inner.get_node(); pk_map.attach_node(node); diff --git a/codegen/src/worktable/generator/index.rs b/codegen/src/worktable/generator/index.rs index ce2afa3c..dbb47e27 100644 --- a/codegen/src/worktable/generator/index.rs +++ b/codegen/src/worktable/generator/index.rs @@ -15,11 +15,13 @@ impl Generator { } else { quote! {} }; + let default_impl = self.gen_index_default_impl(); quote! { #type_def #impl_def #cdc_impl_def + #default_impl } } @@ -63,11 +65,11 @@ impl Generator { let derive = if self.is_persist { quote! { - #[derive(Debug, MemStat, Default, PersistIndex)] + #[derive(Debug, MemStat, PersistIndex)] } } else { quote! { - #[derive(Debug, MemStat, Default)] + #[derive(Debug, MemStat)] } }; @@ -79,6 +81,54 @@ impl Generator { } } + fn gen_index_default_impl(&self) -> TokenStream { + let name_generator = WorktableNameGenerator::from_table_name(self.name.to_string()); + let index_type_ident = name_generator.get_index_type_ident(); + let const_name = name_generator.get_page_inner_size_const_ident(); + + let index_rows = self + .columns + .indexes + .iter() + .map(|(i, idx)| { + let t = self.columns.columns_map.get(i).unwrap(); + let t = if is_float(t.to_string().as_str()) { + quote! { OrderedFloat<#t> } + } else { + quote! { #t } + }; + let i = &idx.name; + + #[allow(clippy::collapsible_else_if)] + if idx.is_unique { + if is_unsized(&t.to_string()) { + quote! { + #i: IndexMap::with_maximum_node_size(#const_name), + } + } else { + quote! {#i: IndexMap::with_maximum_node_size(get_index_page_size_from_data_length::<#t>(#const_name)),} + } + } else { + if is_unsized(&t.to_string()) { + quote! {#i: IndexMultiMap::with_maximum_node_size(#const_name), } + } else { + quote! {#i: IndexMultiMap::with_maximum_node_size(get_index_page_size_from_data_length::<#t>(#const_name)),} + } + } + }) + .collect::>(); + + quote! { + impl Default for #index_type_ident { + fn default() -> Self { + Self { + #(#index_rows)* + } + } + } + } + } + /// Generates implementation of `TableSecondaryIndex` trait for index. fn gen_impl_def(&mut self) -> TokenStream { let name_generator = WorktableNameGenerator::from_table_name(self.name.to_string()); diff --git a/codegen/src/worktable/generator/table/impls.rs b/codegen/src/worktable/generator/table/impls.rs index 29c20d19..2c3c6dac 100644 --- a/codegen/src/worktable/generator/table/impls.rs +++ b/codegen/src/worktable/generator/table/impls.rs @@ -43,12 +43,16 @@ impl Generator { let engine = name_generator.get_persistence_engine_ident(); let task = name_generator.get_persistence_task_ident(); let dir_name = name_generator.get_dir_name(); + let pk_type = name_generator.get_primary_key_type_ident(); + let const_name = name_generator.get_page_inner_size_const_ident(); if self.is_persist { quote! { pub async fn new(config: PersistenceConfig) -> eyre::Result { let mut inner = WorkTable::default(); inner.table_name = #table_name; + let size = get_index_page_size_from_data_length::<#pk_type>(#const_name); + inner.pk_map = IndexMap::with_maximum_node_size(size); let table_files_path = format!("{}/{}", config.tables_path, #dir_name); let engine: #engine = PersistenceEngine::from_table_files_path(table_files_path).await?; core::result::Result::Ok(Self( diff --git a/tests/persistence/sync/string_re_read.rs b/tests/persistence/sync/string_re_read.rs index 3abe7bae..ea21aa09 100644 --- a/tests/persistence/sync/string_re_read.rs +++ b/tests/persistence/sync/string_re_read.rs @@ -81,3 +81,59 @@ fn test_key() { } }) } + +#[test] +fn test_big_amount_reread() { + let config = PersistenceConfig::new("tests/data/key_big_amount", "tests/data/key_big_amount"); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_io() + .enable_time() + .build() + .unwrap(); + + runtime.block_on(async { + remove_dir_if_exists("tests/data/key_big_amount".to_string()).await; + + { + let table = StringReReadWorkTable::load_from_file(config.clone()) + .await + .unwrap(); + for i in 0..1000 { + table + .insert(StringReReadRow { + first: format!("first_{}", i % 100), + id: table.get_next_pk().into(), + third: format!("third_{}", i), + second: format!("second_{}", i), + last: format!("_________________________last_____________________{}", i), + }) + .unwrap(); + } + + table.wait_for_ops().await + } + { + let table = StringReReadWorkTable::load_from_file(config.clone()) + .await + .unwrap(); + table + .insert(StringReReadRow { + first: "first_last".to_string(), + id: table.get_next_pk().into(), + third: "third_last".to_string(), + second: "second_last".to_string(), + last: "_________________________last_____________________".to_string(), + }) + .unwrap(); + table.wait_for_ops().await + } + { + let table = StringReReadWorkTable::load_from_file(config.clone()) + .await + .unwrap(); + assert_eq!(table.select_all().execute().unwrap().len(), 1001); + } + }) +}