diff --git a/Cargo.toml b/Cargo.toml index e5a7943..c5186c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["codegen", "tools/create-data-file", "tools/dump-data-file"] [package] name = "data_bucket" -version = "0.3.14" +version = "0.3.15" edition = "2021" authors = ["Handy-caT"] license = "MIT" diff --git a/codegen/src/persistable/generator/persistable_impl.rs b/codegen/src/persistable/generator/persistable_impl.rs index 873fc28..1337cb6 100644 --- a/codegen/src/persistable/generator/persistable_impl.rs +++ b/codegen/src/persistable/generator/persistable_impl.rs @@ -89,7 +89,7 @@ impl Generator { rkyv::to_bytes::(self).unwrap() } - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") } @@ -222,7 +222,7 @@ impl Generator { }).collect(); Ok(quote! { - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let mut offset = 0usize; #(#size_defs)* diff --git a/src/page/data.rs b/src/page/data.rs index 3d948c4..daad8b9 100644 --- a/src/page/data.rs +++ b/src/page/data.rs @@ -56,7 +56,7 @@ impl Persistable for DataPage { &self.data[..self.length as usize] } - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let mut data = [0; DATA_LENGTH]; data.copy_from_slice(bytes); Self { diff --git a/src/page/header.rs b/src/page/header.rs index a7bf4a0..424e25d 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -9,7 +9,7 @@ use crate::space; use crate::util::Persistable; use crate::PAGE_SIZE; -pub const DATA_VERSION: u32 = 1u32; +pub const DATA_VERSION: u32 = 2u32; /// Header that appears on every page before it's inner data. #[derive( @@ -107,7 +107,7 @@ mod test { #[test] fn test_data_version() { let header = GeneralHeader::new(1.into(), PageType::Empty, 2.into()); - assert_eq!(header.data_version, 1u32); + assert_eq!(header.data_version, DATA_VERSION); } #[test] diff --git a/src/page/index/page.rs b/src/page/index/page.rs index ece7286..974c6d7 100644 --- a/src/page/index/page.rs +++ b/src/page/index/page.rs @@ -108,7 +108,7 @@ where .await?; let mut index_utility_bytes = vec![0u8; index_utility_len]; file.read_exact(index_utility_bytes.as_mut_slice()).await?; - let utility = SizedIndexPageUtility::::from_bytes(&index_utility_bytes); + let utility = SizedIndexPageUtility::::from_bytes(&index_utility_bytes, 0); Ok(utility) } @@ -314,7 +314,7 @@ mod tests { size, ); let bytes = page.as_bytes(); - let new_page = IndexPage::::from_bytes(bytes.as_ref()); + let new_page = IndexPage::::from_bytes(bytes.as_ref(), 0); assert_eq!(new_page.node_id, page.node_id); assert_eq!(new_page.current_index, page.current_index); @@ -335,7 +335,7 @@ mod tests { size, ); let bytes = page.as_bytes(); - let new_page = IndexPage::::from_bytes(bytes.as_ref()); + let new_page = IndexPage::::from_bytes(bytes.as_ref(), 0); assert_eq!(new_page.node_id, page.node_id); assert_eq!(new_page.current_index, page.current_index); @@ -355,7 +355,7 @@ mod tests { size, ); let bytes = page.as_bytes(); - let new_page = IndexPage::::from_bytes(bytes.as_ref()); + let new_page = IndexPage::::from_bytes(bytes.as_ref(), 0); assert_eq!(new_page.node_id, page.node_id); assert_eq!(new_page.current_index, page.current_index); diff --git a/src/page/index/page_for_unsized.rs b/src/page/index/page_for_unsized.rs index d77e54e..e7d657d 100644 --- a/src/page/index/page_for_unsized.rs +++ b/src/page/index/page_for_unsized.rs @@ -106,7 +106,7 @@ where .await?; let mut index_utility_bytes = vec![0u8; index_utility_len]; file.read_exact(index_utility_bytes.as_mut_slice()).await?; - let utility = UnsizedIndexPageUtility::::from_bytes(&index_utility_bytes); + let utility = UnsizedIndexPageUtility::::from_bytes(&index_utility_bytes, 0); Ok(utility) } @@ -304,7 +304,7 @@ where bytes } - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let slots_size_bytes = &bytes[0..UnsizedIndexPageUtility::::slots_size_size()]; let archived = unsafe { rkyv::access_unchecked::<::Archived>(slots_size_bytes) }; @@ -321,7 +321,7 @@ where slots_size as usize, node_id_size as usize, ); - let utility = UnsizedIndexPageUtility::::from_bytes(&bytes[0..utility_len]); + let utility = UnsizedIndexPageUtility::::from_bytes(&bytes[0..utility_len], _version); let mut index_values = Vec::with_capacity(utility.slots.len()); for (offset, len) in &utility.slots { let offset = bytes.len() - *offset as usize; @@ -360,7 +360,7 @@ mod test { .unwrap(); let bytes = page.as_bytes(); assert_eq!(bytes.as_ref().len(), 1024); - let page_back = UnsizedIndexPage::from_bytes(bytes.as_ref()); + let page_back = UnsizedIndexPage::from_bytes(bytes.as_ref(), 0); assert_eq!(page_back, page) } diff --git a/src/page/index/table_of_contents_page.rs b/src/page/index/table_of_contents_page.rs index 392dbb9..5488d19 100644 --- a/src/page/index/table_of_contents_page.rs +++ b/src/page/index/table_of_contents_page.rs @@ -63,7 +63,7 @@ where }; rkyv::to_bytes::(&model).unwrap() } - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let archived = unsafe { rkyv::access_unchecked::< as Archive>::Archived>(bytes) }; diff --git a/src/page/space_info.rs b/src/page/space_info.rs index ff075ef..fb09dd8 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -8,22 +8,145 @@ use rkyv::{Archive, Deserialize, Serialize}; pub type SpaceName = String; +/// Legacy SpaceInfoPage format (version 1) - no version field. +/// Used for migrating existing data files. +#[derive(Archive, Clone, Deserialize, Debug, PartialEq, Serialize, Persistable)] +pub(crate) struct SpaceInfoPageV1 { + pub id: space::Id, + pub page_count: u32, + pub pk_gen_state: Pk, + pub name: SpaceName, + pub row_schema: Vec<(String, String)>, + pub primary_key_fields: Vec, + pub secondary_index_types: Vec<(String, String)>, + pub empty_links_list: Vec, +} + +/// Current SpaceInfoPage format (version 2+) - with version field. +/// Internal struct for serialization, converted to public SpaceInfoPage. +#[derive(Archive, Clone, Deserialize, Debug, PartialEq, Serialize, Persistable)] +pub(crate) struct SpaceInfoPageV2 { + pub id: space::Id, + pub page_count: u32, + pub pk_gen_state: Pk, + pub name: SpaceName, + pub version: u32, + pub row_schema: Vec<(String, String)>, + pub primary_key_fields: Vec, + pub secondary_index_types: Vec<(String, String)>, + pub empty_links_list: Vec, +} + +impl From> for SpaceInfoPage { + fn from(v1: SpaceInfoPageV1) -> Self { + SpaceInfoPage { + version: 0, + id: v1.id, + page_count: v1.page_count, + pk_gen_state: v1.pk_gen_state, + name: v1.name, + row_schema: v1.row_schema, + primary_key_fields: v1.primary_key_fields, + secondary_index_types: v1.secondary_index_types, + empty_links_list: v1.empty_links_list, + } + } +} + +impl From> for SpaceInfoPage { + fn from(v2: SpaceInfoPageV2) -> Self { + SpaceInfoPage { + version: v2.version, + id: v2.id, + page_count: v2.page_count, + pk_gen_state: v2.pk_gen_state, + name: v2.name, + row_schema: v2.row_schema, + primary_key_fields: v2.primary_key_fields, + secondary_index_types: v2.secondary_index_types, + empty_links_list: v2.empty_links_list, + } + } +} + +impl From> for SpaceInfoPageV2 { + fn from(page: SpaceInfoPage) -> Self { + SpaceInfoPageV2 { + version: page.version, + id: page.id, + page_count: page.page_count, + pk_gen_state: page.pk_gen_state, + name: page.name, + row_schema: page.row_schema, + primary_key_fields: page.primary_key_fields, + secondary_index_types: page.secondary_index_types, + empty_links_list: page.empty_links_list, + } + } +} + // TODO: Minor. Add some schema description in `SpaceIndo` /// Internal information about a `Space`. Always appears first before all other /// pages in a `Space`. -#[derive(Archive, Clone, Deserialize, Debug, PartialEq, Serialize, Persistable)] +#[derive(Archive, Clone, Deserialize, Debug, PartialEq, Serialize)] pub struct SpaceInfoPage { pub id: space::Id, pub page_count: u32, pub pk_gen_state: Pk, pub name: SpaceName, + pub version: u32, pub row_schema: Vec<(String, String)>, pub primary_key_fields: Vec, pub secondary_index_types: Vec<(String, String)>, pub empty_links_list: Vec, } +impl Persistable for SpaceInfoPage +where + Pk: Archive + + Clone + + for<'a> rkyv::Serialize< + rkyv::rancor::Strategy< + rkyv::ser::Serializer< + rkyv::util::AlignedVec, + rkyv::ser::allocator::ArenaHandle<'a>, + rkyv::ser::sharing::Share, + >, + rkyv::rancor::Error, + >, + >, + ::Archived: rkyv::Deserialize>, +{ + fn as_bytes(&self) -> impl AsRef<[u8]> + Send { + let v2 = SpaceInfoPageV2 { + version: self.version, + id: self.id, + page_count: self.page_count, + pk_gen_state: self.pk_gen_state.clone(), + name: self.name.clone(), + row_schema: self.row_schema.clone(), + primary_key_fields: self.primary_key_fields.clone(), + secondary_index_types: self.secondary_index_types.clone(), + empty_links_list: self.empty_links_list.clone(), + }; + rkyv::to_bytes::(&v2).unwrap() + } + + fn from_bytes(bytes: &[u8], version: u32) -> Self { + match version { + 1 => { + let v1 = SpaceInfoPageV1::from_bytes(bytes, version); + v1.into() + } + _ => { + let v2 = SpaceInfoPageV2::from_bytes(bytes, version); + v2.into() + } + } + } +} + /// Represents some interval between values. #[derive(Archive, Clone, Deserialize, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)] pub struct Interval(pub usize, pub usize); @@ -36,22 +159,137 @@ impl Interval { #[cfg(test)] mod test { - use crate::page::{SpaceInfoPage, INNER_PAGE_SIZE}; + use super::{SpaceInfoPage, SpaceInfoPageV1, SpaceInfoPageV2}; + use crate::page::INNER_PAGE_SIZE; use crate::util::Persistable; + use rkyv::Archive; #[test] fn test_as_bytes() { - let info = SpaceInfoPage { + let info: SpaceInfoPage = SpaceInfoPage { id: 0.into(), page_count: 0, name: "Test".to_string(), + version: 1, row_schema: vec![], primary_key_fields: vec![], - pk_gen_state: 0u128, + pk_gen_state: (), empty_links_list: vec![], secondary_index_types: vec![], }; let bytes = info.as_bytes(); assert!(bytes.as_ref().len() < INNER_PAGE_SIZE) } -} + + #[test] + fn test_migration_from_v1() { + let old_info: SpaceInfoPageV1 = SpaceInfoPageV1 { + id: 42.into(), + page_count: 5, + pk_gen_state: (), + name: "legacy_table".to_string(), + row_schema: vec![("col1".to_string(), "i32".to_string())], + primary_key_fields: vec!["col1".to_string()], + secondary_index_types: vec![], + empty_links_list: vec![], + }; + + let migrated: SpaceInfoPage = old_info.into(); + + assert_eq!(migrated.version, 0); + assert_eq!(migrated.id, 42.into()); + assert_eq!(migrated.page_count, 5); + assert_eq!(migrated.name, "legacy_table"); + assert_eq!(migrated.row_schema, vec![("col1".to_string(), "i32".to_string())]); + assert_eq!(migrated.primary_key_fields, vec!["col1".to_string()]); + } + + #[test] + fn test_v1_bytes_roundtrip_and_migration() { + let old_info: SpaceInfoPageV1 = SpaceInfoPageV1 { + id: 10.into(), + page_count: 3, + pk_gen_state: (), + name: "old_data".to_string(), + row_schema: vec![], + primary_key_fields: vec![], + secondary_index_types: vec![], + empty_links_list: vec![], + }; + let bytes = old_info.as_bytes(); + + let archived = unsafe { + rkyv::access_unchecked::<::Archived>(bytes.as_ref()) + }; + let deserialized: SpaceInfoPageV1 = + rkyv::deserialize::<_, rkyv::rancor::Error>(archived).unwrap(); + + let migrated: SpaceInfoPage = deserialized.into(); + + assert_eq!(migrated.version, 0); + assert_eq!(migrated.id, 10.into()); + assert_eq!(migrated.name, "old_data"); + } + + #[test] + fn test_v2_bytes_roundtrip() { + let info: SpaceInfoPageV2 = SpaceInfoPageV2 { + id: 20.into(), + page_count: 7, + pk_gen_state: (), + name: "new_table".to_string(), + version: 2, + row_schema: vec![("col2".to_string(), "String".to_string())], + primary_key_fields: vec!["col2".to_string()], + secondary_index_types: vec![], + empty_links_list: vec![], + }; + let bytes = info.as_bytes(); + + let archived = unsafe { + rkyv::access_unchecked::<::Archived>(bytes.as_ref()) + }; + let deserialized: SpaceInfoPageV2 = + rkyv::deserialize::<_, rkyv::rancor::Error>(archived).unwrap(); + + assert_eq!(deserialized.version, 2); + assert_eq!(deserialized.id, 20.into()); + assert_eq!(deserialized.name, "new_table"); + } + + #[test] + fn test_persistable_version_handling() { + let v1_info: SpaceInfoPageV1 = SpaceInfoPageV1 { + id: 100.into(), + page_count: 10, + pk_gen_state: (), + name: "v1_table".to_string(), + row_schema: vec![], + primary_key_fields: vec![], + secondary_index_types: vec![], + empty_links_list: vec![], + }; + let v1_bytes = v1_info.as_bytes(); + + let page_from_v1: SpaceInfoPage = SpaceInfoPage::from_bytes(v1_bytes.as_ref(), 1); + assert_eq!(page_from_v1.version, 0); + assert_eq!(page_from_v1.id, 100.into()); + + let v2_info: SpaceInfoPageV2 = SpaceInfoPageV2 { + id: 200.into(), + page_count: 20, + pk_gen_state: (), + name: "v2_table".to_string(), + version: 5, + row_schema: vec![], + primary_key_fields: vec![], + secondary_index_types: vec![], + empty_links_list: vec![], + }; + let v2_bytes = v2_info.as_bytes(); + + let page_from_v2: SpaceInfoPage = SpaceInfoPage::from_bytes(v2_bytes.as_ref(), 2); + assert_eq!(page_from_v2.version, 5); + assert_eq!(page_from_v2.id, 200.into()); + } +} \ No newline at end of file diff --git a/src/page/util.rs b/src/page/util.rs index 990a3ae..654f291 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -186,7 +186,7 @@ where let mut buffer: Vec = vec![0u8; length as usize]; file.read_exact(&mut buffer).await?; - let info = Page::from_bytes(buffer.as_ref()); + let info = Page::from_bytes(buffer.as_ref(), header.data_version); Ok(GeneralPage { header, @@ -319,12 +319,8 @@ pub async fn parse_space_info( let mut buffer = vec![0u8; header.data_length as usize]; file.read_exact(&mut buffer).await?; - let archived = - unsafe { rkyv::access_unchecked::<::Archived>(&buffer[..]) }; - let space_info: SpaceInfoPage = - rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); - Ok(space_info) + Ok(SpaceInfoPage::from_bytes(&buffer, header.data_version)) } // pub fn read_index_pages( diff --git a/src/util/persistable.rs b/src/util/persistable.rs index 8fb5620..951caaf 100644 --- a/src/util/persistable.rs +++ b/src/util/persistable.rs @@ -10,7 +10,7 @@ use rkyv::{Archive, Deserialize, Serialize}; pub trait Persistable { fn as_bytes(&self) -> impl AsRef<[u8]> + Send; - fn from_bytes(bytes: &[u8]) -> Self; + fn from_bytes(bytes: &[u8], version: u32) -> Self; } impl Persistable for Vec @@ -27,7 +27,7 @@ where rkyv::to_bytes::(self).unwrap() } - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") } @@ -38,7 +38,7 @@ impl Persistable for u8 { rkyv::to_bytes::(self).unwrap() } - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") } @@ -49,7 +49,7 @@ impl Persistable for String { rkyv::to_bytes::(self).unwrap() } - fn from_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8], _version: u32) -> Self { let archived = unsafe { rkyv::access_unchecked::<::Archived>(bytes) }; rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid") }