diff --git a/README.md b/README.md index c57db55..10fea10 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ The configuration file uses following kind of syntax: ```YAML config: + charset: iso-8859-1 # defaults to utf-8 if missing, only affects mysqldump addons: - some.other.package - yet.another.package diff --git a/database_sanitizer/config.py b/database_sanitizer/config.py index d5f1c6e..77363ed 100644 --- a/database_sanitizer/config.py +++ b/database_sanitizer/config.py @@ -12,6 +12,7 @@ SKIP_ROWS_CONFIG_VALUE = "skip_rows" MYSQLDUMP_DEFAULT_PARAMETERS = ["--single-transaction"] PG_DUMP_DEFAULT_PARAMETERS = [] +CHARSET_DEFAULT = "utf-8" class ConfigurationError(ValueError): @@ -31,6 +32,7 @@ def __init__(self): self.addon_packages = [] self.mysqldump_params = [] self.pg_dump_params = [] + self.charset = "" @classmethod def from_file(cls, filename): @@ -73,6 +75,16 @@ def load(self, config_data): self.load_sanitizers(config_data) self.load_dump_extra_parameters(config_data) + charset = config_data.get("config",{}).get("charset", CHARSET_DEFAULT) + if not isinstance(charset, str): + raise ConfigurationError( + "'config' is %s instead of str" % ( + type(charset), + ), + ) + + self.charset = charset + def load_dump_extra_parameters(self, config_data): """ Loads extra parameters for mysqldump and/or pg_dump CLI usage. These diff --git a/database_sanitizer/dump/mysql.py b/database_sanitizer/dump/mysql.py index 8a8f63c..4f5b551 100644 --- a/database_sanitizer/dump/mysql.py +++ b/database_sanitizer/dump/mysql.py @@ -84,7 +84,7 @@ def sanitize_from_stream(stream, config): of the values stored in the database. :type config: database_sanitizer.config.Configuration|None """ - for line in io.TextIOWrapper(stream, encoding="utf-8"): + for line in io.TextIOWrapper(stream, encoding=config.charset): # Eat the trailing new line. line = line.rstrip("\n")