@@ -39,11 +39,18 @@ use oid;
3939
4040use serde:: de:: Deserialize ;
4141
42- fn read_string < R : Read + ?Sized > ( reader : & mut R ) -> DecoderResult < String > {
42+ fn read_string < R : Read + ?Sized > ( reader : & mut R , utf8_lossy : bool ) -> DecoderResult < String > {
4343 let len = try!( reader. read_i32 :: < LittleEndian > ( ) ) ;
4444
45- let mut s = String :: with_capacity ( len as usize - 1 ) ;
46- try!( reader. take ( len as u64 - 1 ) . read_to_string ( & mut s) ) ;
45+ let s = if utf8_lossy {
46+ let mut buf = Vec :: with_capacity ( len as usize - 1 ) ;
47+ try!( reader. take ( len as u64 - 1 ) . read_to_end ( & mut buf) ) ;
48+ String :: from_utf8_lossy ( & buf) . to_string ( )
49+ } else {
50+ let mut s = String :: with_capacity ( len as usize - 1 ) ;
51+ try!( reader. take ( len as u64 - 1 ) . read_to_string ( & mut s) ) ;
52+ s
53+ } ;
4754 try!( reader. read_u8 ( ) ) ; // The last 0x00
4855
4956 Ok ( s)
@@ -88,15 +95,38 @@ pub fn decode_document<R: Read + ?Sized>(reader: &mut R) -> DecoderResult<Docume
8895 }
8996
9097 let key = try!( read_cstring ( reader) ) ;
91- let val = try!( decode_bson ( reader, tag) ) ;
98+ let val = try!( decode_bson ( reader, tag, false ) ) ;
9299
93100 doc. insert ( key, val) ;
94101 }
95102
96103 Ok ( doc)
97104}
98105
99- fn decode_array < R : Read + ?Sized > ( reader : & mut R ) -> DecoderResult < Array > {
106+ /// Attempt to decode a `Document` that may contain invalid UTF-8 strings from a byte stream.
107+ pub fn decode_document_utf8_lossy < R : Read + ?Sized > ( reader : & mut R ) -> DecoderResult < Document > {
108+ let mut doc = Document :: new ( ) ;
109+
110+ // disregard the length: using Read::take causes infinite type recursion
111+ try!( read_i32 ( reader) ) ;
112+
113+ loop {
114+ let tag = try!( reader. read_u8 ( ) ) ;
115+
116+ if tag == 0 {
117+ break ;
118+ }
119+
120+ let key = try!( read_cstring ( reader) ) ;
121+ let val = try!( decode_bson ( reader, tag, true ) ) ;
122+
123+ doc. insert ( key, val) ;
124+ }
125+
126+ Ok ( doc)
127+ }
128+
129+ fn decode_array < R : Read + ?Sized > ( reader : & mut R , utf8_lossy : bool ) -> DecoderResult < Array > {
100130 let mut arr = Array :: new ( ) ;
101131
102132 // disregard the length: using Read::take causes infinite type recursion
@@ -119,20 +149,20 @@ fn decode_array<R: Read + ?Sized>(reader: &mut R) -> DecoderResult<Array> {
119149 }
120150 }
121151
122- let val = try!( decode_bson ( reader, tag) ) ;
152+ let val = try!( decode_bson ( reader, tag, utf8_lossy ) ) ;
123153 arr. push ( val)
124154 }
125155
126156 Ok ( arr)
127157}
128158
129- fn decode_bson < R : Read + ?Sized > ( reader : & mut R , tag : u8 ) -> DecoderResult < Bson > {
159+ fn decode_bson < R : Read + ?Sized > ( reader : & mut R , tag : u8 , utf8_lossy : bool ) -> DecoderResult < Bson > {
130160 use spec:: ElementType :: * ;
131161 match spec:: ElementType :: from ( tag) {
132162 Some ( FloatingPoint ) => Ok ( Bson :: FloatingPoint ( try!( reader. read_f64 :: < LittleEndian > ( ) ) ) ) ,
133- Some ( Utf8String ) => read_string ( reader) . map ( Bson :: String ) ,
163+ Some ( Utf8String ) => read_string ( reader, utf8_lossy ) . map ( Bson :: String ) ,
134164 Some ( EmbeddedDocument ) => decode_document ( reader) . map ( Bson :: Document ) ,
135- Some ( Array ) => decode_array ( reader) . map ( Bson :: Array ) ,
165+ Some ( Array ) => decode_array ( reader, utf8_lossy ) . map ( Bson :: Array ) ,
136166 Some ( Binary ) => {
137167 let len = try!( read_i32 ( reader) ) ;
138168 let subtype = BinarySubtype :: from ( try!( reader. read_u8 ( ) ) ) ;
@@ -154,13 +184,13 @@ fn decode_bson<R: Read + ?Sized>(reader: &mut R, tag: u8) -> DecoderResult<Bson>
154184 let opt = try!( read_cstring ( reader) ) ;
155185 Ok ( Bson :: RegExp ( pat, opt) )
156186 }
157- Some ( JavaScriptCode ) => read_string ( reader) . map ( Bson :: JavaScriptCode ) ,
187+ Some ( JavaScriptCode ) => read_string ( reader, utf8_lossy ) . map ( Bson :: JavaScriptCode ) ,
158188 Some ( JavaScriptCodeWithScope ) => {
159189 // disregard the length:
160190 // using Read::take causes infinite type recursion
161191 try!( read_i32 ( reader) ) ;
162192
163- let code = try!( read_string ( reader) ) ;
193+ let code = try!( read_string ( reader, utf8_lossy ) ) ;
164194 let scope = try!( decode_document ( reader) ) ;
165195 Ok ( Bson :: JavaScriptCodeWithScope ( code, scope) )
166196 }
@@ -171,7 +201,7 @@ fn decode_bson<R: Read + ?Sized>(reader: &mut R, tag: u8) -> DecoderResult<Bson>
171201 let time = try!( read_i64 ( reader) ) ;
172202 Ok ( Bson :: UtcDatetime ( Utc . timestamp ( time / 1000 , ( time % 1000 ) as u32 * 1000000 ) ) )
173203 }
174- Some ( Symbol ) => read_string ( reader) . map ( Bson :: Symbol ) ,
204+ Some ( Symbol ) => read_string ( reader, utf8_lossy ) . map ( Bson :: Symbol ) ,
175205 Some ( Undefined ) | Some ( DbPointer ) | Some ( MaxKey ) | Some ( MinKey ) | None => {
176206 Err ( DecoderError :: UnrecognizedElementType ( tag) )
177207 }
0 commit comments