1
1
use std:: ffi:: { c_int, c_void, CStr } ;
2
- use std:: fs:: { File , OpenOptions } ;
2
+ use std:: fs:: { remove_dir_all , File , OpenOptions } ;
3
3
use std:: io:: Write ;
4
4
use std:: mem:: size_of;
5
5
use std:: os:: unix:: prelude:: FileExt ;
@@ -10,25 +10,36 @@ use anyhow::{bail, ensure};
10
10
use bytemuck:: { bytes_of, pod_read_unaligned, Pod , Zeroable } ;
11
11
use bytes:: { Bytes , BytesMut } ;
12
12
use parking_lot:: RwLock ;
13
- use rusqlite:: ffi:: SQLITE_IOERR ;
14
13
use sqld_libsql_bindings:: init_static_wal_method;
15
14
use tokio:: sync:: watch;
16
15
use uuid:: Uuid ;
17
16
17
+ #[ cfg( feature = "bottomless" ) ]
18
+ use crate :: libsql:: ffi:: SQLITE_IOERR_WRITE ;
18
19
use crate :: libsql:: ffi:: {
19
20
sqlite3,
20
21
types:: { XWalCheckpointFn , XWalFrameFn , XWalSavePointUndoFn , XWalUndoFn } ,
21
- PgHdr , Wal , SQLITE_OK ,
22
+ PageHdrIter , PgHdr , Wal , SQLITE_CHECKPOINT_TRUNCATE , SQLITE_IOERR , SQLITE_OK ,
22
23
} ;
23
- #[ cfg( feature = "bottomless" ) ]
24
- use crate :: libsql:: ffi:: { SQLITE_CHECKPOINT_TRUNCATE , SQLITE_IOERR_WRITE } ;
25
- use crate :: libsql:: { ffi:: PageHdrIter , wal_hook:: WalHook } ;
24
+ use crate :: libsql:: wal_hook:: WalHook ;
26
25
use crate :: replication:: frame:: { Frame , FrameHeader } ;
27
26
use crate :: replication:: snapshot:: { find_snapshot_file, LogCompactor , SnapshotFile } ;
28
27
use crate :: replication:: { FrameNo , CRC_64_GO_ISO , WAL_MAGIC , WAL_PAGE_SIZE } ;
29
28
30
29
init_static_wal_method ! ( REPLICATION_METHODS , ReplicationLoggerHook ) ;
31
30
31
+ #[ derive( PartialEq , Eq ) ]
32
+ struct Version ( [ u16 ; 4 ] ) ;
33
+
34
+ impl Version {
35
+ fn current ( ) -> Self {
36
+ let major = env ! ( "CARGO_PKG_VERSION_MAJOR" ) . parse ( ) . unwrap ( ) ;
37
+ let minor = env ! ( "CARGO_PKG_VERSION_MINOR" ) . parse ( ) . unwrap ( ) ;
38
+ let patch = env ! ( "CARGO_PKG_VERSION_PATCH" ) . parse ( ) . unwrap ( ) ;
39
+ Self ( [ 0 , major, minor, patch] )
40
+ }
41
+ }
42
+
32
43
pub enum ReplicationLoggerHook { }
33
44
34
45
#[ derive( Clone ) ]
@@ -378,14 +389,14 @@ impl LogFile {
378
389
if file_end == 0 {
379
390
let db_id = Uuid :: new_v4 ( ) ;
380
391
let header = LogFileHeader {
381
- version : 1 ,
392
+ version : 2 ,
382
393
start_frame_no : 0 ,
383
394
magic : WAL_MAGIC ,
384
395
page_size : WAL_PAGE_SIZE ,
385
396
start_checksum : 0 ,
386
397
db_id : db_id. as_u128 ( ) ,
387
398
frame_count : 0 ,
388
- _pad : 0 ,
399
+ sqld_version : Version :: current ( ) . 0 ,
389
400
} ;
390
401
391
402
let mut this = Self {
@@ -623,6 +634,13 @@ impl LogFile {
623
634
Some ( self . header . start_frame_no + self . header . frame_count - 1 )
624
635
}
625
636
}
637
+
638
+ fn reset ( self ) -> anyhow:: Result < Self > {
639
+ let max_log_frame_count = self . max_log_frame_count ;
640
+ // truncate file
641
+ self . file . set_len ( 0 ) ?;
642
+ Self :: new ( self . file , max_log_frame_count)
643
+ }
626
644
}
627
645
628
646
#[ cfg( target_os = "macos" ) ]
@@ -680,17 +698,22 @@ pub struct LogFileHeader {
680
698
pub start_frame_no : FrameNo ,
681
699
/// entry count in file
682
700
pub frame_count : u64 ,
683
- /// Wal file version number, currently: 1
701
+ /// Wal file version number, currently: 2
684
702
pub version : u32 ,
685
703
/// page size: 4096
686
704
pub page_size : i32 ,
687
- pub _pad : u64 ,
705
+ /// sqld version when creating this log
706
+ pub sqld_version : [ u16 ; 4 ] ,
688
707
}
689
708
690
709
impl LogFileHeader {
691
710
pub fn last_frame_no ( & self ) -> FrameNo {
692
711
self . start_frame_no + self . frame_count
693
712
}
713
+
714
+ fn sqld_version ( & self ) -> Version {
715
+ Version ( self . sqld_version )
716
+ }
694
717
}
695
718
696
719
pub struct Generation {
@@ -720,28 +743,87 @@ pub struct ReplicationLogger {
720
743
impl ReplicationLogger {
721
744
pub fn open ( db_path : & Path , max_log_size : u64 ) -> anyhow:: Result < Self > {
722
745
let log_path = db_path. join ( "wallog" ) ;
746
+ let data_path = db_path. join ( "data" ) ;
747
+
748
+ let fresh = !log_path. exists ( ) ;
749
+
723
750
let file = OpenOptions :: new ( )
724
751
. create ( true )
725
752
. write ( true )
726
753
. read ( true )
727
754
. open ( log_path) ?;
755
+
728
756
let max_log_frame_count = max_log_size * 1_000_000 / LogFile :: FRAME_SIZE as u64 ;
729
757
let log_file = LogFile :: new ( file, max_log_frame_count) ?;
758
+ let header = log_file. header ( ) ;
759
+
760
+ let should_recover = if header. version < 2 || header. sqld_version ( ) != Version :: current ( ) {
761
+ tracing:: info!( "replication log version not compatible with current sqld version, recovering from database file." ) ;
762
+ true
763
+ } else if fresh && data_path. exists ( ) {
764
+ tracing:: info!( "replication log not found, recovering from database file." ) ;
765
+ true
766
+ } else {
767
+ false
768
+ } ;
769
+
770
+ if should_recover {
771
+ Self :: recover ( log_file, data_path)
772
+ } else {
773
+ Self :: from_log_file ( db_path. to_path_buf ( ) , log_file)
774
+ }
775
+ }
730
776
731
- let header = log_file. header ;
777
+ fn from_log_file ( db_path : PathBuf , log_file : LogFile ) -> anyhow:: Result < Self > {
778
+ let header = log_file. header ( ) ;
732
779
let generation_start_frame_no = header. start_frame_no + header. frame_count ;
733
780
734
781
let ( new_frame_notifier, _) = watch:: channel ( generation_start_frame_no) ;
735
782
736
783
Ok ( Self {
737
784
generation : Generation :: new ( generation_start_frame_no) ,
738
- compactor : LogCompactor :: new ( db_path, log_file. header . db_id ) ?,
785
+ compactor : LogCompactor :: new ( & db_path, log_file. header . db_id ) ?,
739
786
log_file : RwLock :: new ( log_file) ,
740
- db_path : db_path . to_owned ( ) ,
787
+ db_path,
741
788
new_frame_notifier,
742
789
} )
743
790
}
744
791
792
+ fn recover ( log_file : LogFile , mut data_path : PathBuf ) -> anyhow:: Result < Self > {
793
+ // It is necessary to checkpoint before we restore the replication log, since the WAL may
794
+ // contain pages that are not in the database file.
795
+ checkpoint_db ( & data_path) ?;
796
+ let mut log_file = log_file. reset ( ) ?;
797
+ let snapshot_path = data_path. parent ( ) . unwrap ( ) . join ( "snapshots" ) ;
798
+ // best effort, there may be no snapshots
799
+ let _ = remove_dir_all ( snapshot_path) ;
800
+
801
+ let data_file = File :: open ( & data_path) ?;
802
+ let size = data_path. metadata ( ) ?. len ( ) ;
803
+ assert ! (
804
+ size % WAL_PAGE_SIZE as u64 == 0 ,
805
+ "database file size is not a multiple of page size"
806
+ ) ;
807
+ let num_page = size / WAL_PAGE_SIZE as u64 ;
808
+ let mut buf = [ 0 ; WAL_PAGE_SIZE as usize ] ;
809
+ let mut page_no = 1 ; // page numbering starts at 1
810
+ for i in 0 ..num_page {
811
+ data_file. read_exact_at ( & mut buf, i * WAL_PAGE_SIZE as u64 ) ?;
812
+ log_file. push_page ( & WalPage {
813
+ page_no,
814
+ size_after : if i == num_page - 1 { num_page as _ } else { 0 } ,
815
+ data : Bytes :: copy_from_slice ( & buf) ,
816
+ } ) ?;
817
+ log_file. commit ( ) ?;
818
+
819
+ page_no += 1 ;
820
+ }
821
+
822
+ assert ! ( data_path. pop( ) ) ;
823
+
824
+ Self :: from_log_file ( data_path, log_file)
825
+ }
826
+
745
827
pub fn database_id ( & self ) -> anyhow:: Result < Uuid > {
746
828
Ok ( Uuid :: from_u128 ( ( self . log_file . read ( ) ) . header ( ) . db_id ) )
747
829
}
@@ -790,6 +872,39 @@ impl ReplicationLogger {
790
872
}
791
873
}
792
874
875
+ fn checkpoint_db ( data_path : & Path ) -> anyhow:: Result < ( ) > {
876
+ unsafe {
877
+ let conn = rusqlite:: Connection :: open ( data_path) ?;
878
+ conn. pragma_query ( None , "page_size" , |row| {
879
+ let page_size = row. get :: < _ , i32 > ( 0 ) . unwrap ( ) ;
880
+ assert_eq ! (
881
+ page_size, WAL_PAGE_SIZE ,
882
+ "invalid database file, expected page size to be {}, but found {} instead" ,
883
+ WAL_PAGE_SIZE , page_size
884
+ ) ;
885
+ Ok ( ( ) )
886
+ } ) ?;
887
+ let mut num_checkpointed: c_int = 0 ;
888
+ let rc = rusqlite:: ffi:: sqlite3_wal_checkpoint_v2 (
889
+ conn. handle ( ) ,
890
+ std:: ptr:: null ( ) ,
891
+ SQLITE_CHECKPOINT_TRUNCATE ,
892
+ & mut num_checkpointed as * mut _ ,
893
+ std:: ptr:: null_mut ( ) ,
894
+ ) ;
895
+
896
+ // TODO: ensure correct page size
897
+ ensure ! (
898
+ rc == 0 && num_checkpointed >= 0 ,
899
+ "failed to checkpoint database while recovering replication log"
900
+ ) ;
901
+
902
+ conn. execute ( "VACUUM" , ( ) ) ?;
903
+ }
904
+
905
+ Ok ( ( ) )
906
+ }
907
+
793
908
#[ cfg( test) ]
794
909
mod test {
795
910
use super :: * ;
0 commit comments