Skip to content

Commit c7ac532

Browse files
committed
db-sync: Reject TxMetadata objects containing NUL characters
TxMetadata is stored as JSON and that JSON is stored in a 'jsonb' column in PostgreSQL. However, there are limitations to that Postgres 'jsonb' data type. Specifically, it cannot contain Uniciode NUL characters. This temporary fix simply drops TxMetadata JSON objects that would otherwise be rejected by Postgres. Hopefully a better solution will be will be dreamt up and implemented later. Temporary workaround fix for: #297
1 parent e8825e3 commit c7ac532

File tree

1 file changed

+36
-9
lines changed
  • cardano-db-sync/src/Cardano/DbSync/Era/Shelley

1 file changed

+36
-9
lines changed

cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Insert.hs

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99

1010
module Cardano.DbSync.Era.Shelley.Insert
1111
( insertShelleyBlock
12+
, containsUnicodeNul
13+
, safeDecodeUtf8
1214
) where
1315

1416
import Cardano.Prelude
1517

16-
import Cardano.BM.Trace (Trace, logDebug, logError, logInfo)
18+
import Cardano.BM.Trace (Trace, logDebug, logError, logInfo, logWarning)
1719

1820
import Cardano.Db (DbWord64 (..))
1921

@@ -38,9 +40,12 @@ import Cardano.DbSync.Util
3840
import Cardano.Slotting.Slot (EpochNo (..), EpochSize (..))
3941

4042
import qualified Data.Aeson as Aeson
43+
import qualified Data.ByteString.Char8 as BS
4144
import qualified Data.ByteString.Lazy.Char8 as LBS
4245
import qualified Data.Map.Strict as Map
46+
import qualified Data.Text as Text
4347
import qualified Data.Text.Encoding as Text
48+
import qualified Data.Text.Encoding.Error as Text
4449

4550
import Database.Persist.Sql (SqlBackend)
4651

@@ -481,17 +486,39 @@ insertTxMetadata
481486
:: (MonadBaseControl IO m, MonadIO m)
482487
=> Trace IO Text -> DB.TxId -> Shelley.MetaData
483488
-> ExceptT DbSyncNodeError (ReaderT SqlBackend m) ()
484-
insertTxMetadata _tracer txId (Shelley.MetaData mdmap) =
489+
insertTxMetadata tracer txId (Shelley.MetaData mdmap) =
485490
mapM_ insert $ Map.toList mdmap
486491
where
487492
insert
488493
:: (MonadBaseControl IO m, MonadIO m)
489494
=> (Word64, Shelley.MetaDatum)
490495
-> ExceptT DbSyncNodeError (ReaderT SqlBackend m) ()
491-
insert (key, md) =
492-
void . lift . DB.insertTxMetadata $
493-
DB.TxMetadata
494-
{ DB.txMetadataKey = DbWord64 key
495-
, DB.txMetadataJson = Text.decodeUtf8 . LBS.toStrict $ Aeson.encode (jsonFromMetadataValue md)
496-
, DB.txMetadataTxId = txId
497-
}
496+
insert (key, md) = do
497+
let jsonbs = LBS.toStrict $ Aeson.encode (jsonFromMetadataValue md)
498+
ejson <- liftIO $ safeDecodeUtf8 jsonbs
499+
case ejson of
500+
Left err ->
501+
liftIO . logWarning tracer $ mconcat
502+
[ "insertTxMetadata: Could not decode to UTF8: ", textShow err ]
503+
Right json -> do
504+
-- See https://github.com/input-output-hk/cardano-db-sync/issues/297
505+
if containsUnicodeNul json
506+
then liftIO $ logWarning tracer "insertTxMetadata: dropped due to a Unicode NUL character."
507+
else
508+
void . lift . DB.insertTxMetadata $
509+
DB.TxMetadata
510+
{ DB.txMetadataKey = DbWord64 key
511+
, DB.txMetadataJson = json
512+
, DB.txMetadataTxId = txId
513+
}
514+
515+
safeDecodeUtf8 :: ByteString -> IO (Either Text.UnicodeException Text)
516+
safeDecodeUtf8 bs
517+
| BS.any isNullChar bs = pure $ Left (Text.DecodeError (BS.unpack bs) (Just 0))
518+
| otherwise = try $ evaluate (Text.decodeUtf8With Text.strictDecode bs)
519+
where
520+
isNullChar :: Char -> Bool
521+
isNullChar ch = ord ch == 0
522+
523+
containsUnicodeNul :: Text -> Bool
524+
containsUnicodeNul = Text.isInfixOf "\\u000"

0 commit comments

Comments
 (0)