diff --git a/script/.gitignore b/script/.gitignore index 1fb9ef57453b6..96109316efcb9 100644 --- a/script/.gitignore +++ b/script/.gitignore @@ -1 +1,2 @@ tmp/* +settings.local.yml diff --git a/script/import_scripts/mbox/importer.rb b/script/import_scripts/mbox/importer.rb index f091827a9d5d0..028d7c00ca4c7 100644 --- a/script/import_scripts/mbox/importer.rb +++ b/script/import_scripts/mbox/importer.rb @@ -13,10 +13,11 @@ def initialize(settings) @database = Database.new(@settings.data_dir, @settings.batch_size) end - def change_site_settings - super - - SiteSetting.enable_staged_users = true + def get_site_settings_for_import + settings = super + settings[:enable_staged_users] = true + settings[:incoming_email_prefer_html] = @settings.prefer_html + settings end protected @@ -120,7 +121,7 @@ def format_raw(email_body, attachment_html, elided, format) when Email::Receiver::formats[:markdown] body = email_body body << attachment_html if attachment_html.present? - body << elided if elided.present? + body << Email::Receiver.elided_html(elided) if elided.present? when Email::Receiver::formats[:plaintext] body = %|[plaintext]\n#{escape_tags(email_body)}\n[/plaintext]| body << %|\n[attachments]\n#{escape_tags(attachment_html)}\n[/attachments]| if attachment_html.present? diff --git a/script/import_scripts/mbox/settings.yml b/script/import_scripts/mbox/settings.yml index 0a193aa1ed0b9..a8db4453fd719 100644 --- a/script/import_scripts/mbox/settings.yml +++ b/script/import_scripts/mbox/settings.yml @@ -1,9 +1,10 @@ # PostgreSQL mailing lists -#data_dir: /data/import/postgres +#data_dir: /shared/import/data #split_regex: "^From .*@postgresql.org.*" # ruby-talk mailing list -data_dir: /data/import/ruby-talk/news/gmane/comp/lang/ruby +data_dir: /shared/import/data split_regex: "" default_trust_level: 1 +prefer_html: false diff --git a/script/import_scripts/mbox/support/indexer.rb b/script/import_scripts/mbox/support/indexer.rb index 0ae963e7fbd40..b7972f1ce7737 100644 --- a/script/import_scripts/mbox/support/indexer.rb +++ b/script/import_scripts/mbox/support/indexer.rb @@ -102,10 +102,12 @@ def all_messages(directory, category_name) if @split_regex.present? each_mail(filename) do |raw_message, first_line_number, last_line_number| - yield read_mail_from_string(raw_message), filename, first_line_number, last_line_number + receiver = read_mail_from_string(raw_message) + yield receiver, filename, first_line_number, last_line_number if receiver.present? end else - yield read_mail_from_file(filename), filename + receiver = read_mail_from_file(filename) + yield receiver, filename if receiver.present? end mark_as_fully_indexed(category_name, filename) @@ -161,7 +163,7 @@ def read_mail_from_file(filename) end def read_mail_from_string(raw_message) - Email::Receiver.new(raw_message) + Email::Receiver.new(raw_message) unless raw_message.blank? end def extract_reply_message_ids(mail) @@ -208,7 +210,12 @@ def clean_subject(subject) end def ignored_file?(filename, checksums) - File.directory?(filename) || metadata_file?(filename) || fully_indexed?(filename, checksums) + File.directory?(filename) || hidden_file?(filename) || + metadata_file?(filename) || fully_indexed?(filename, checksums) + end + + def hidden_file?(filename) + File.basename(filename).start_with?('.') end def metadata_file?(filename) diff --git a/script/import_scripts/mbox/support/settings.rb b/script/import_scripts/mbox/support/settings.rb index 79fb5ab56b666..8ec5456a230ca 100644 --- a/script/import_scripts/mbox/support/settings.rb +++ b/script/import_scripts/mbox/support/settings.rb @@ -11,12 +11,14 @@ def self.load(filename) attr_reader :split_regex attr_reader :batch_size attr_reader :trust_level + attr_reader :prefer_html def initialize(yaml) @data_dir = yaml['data_dir'] @split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty? @batch_size = 1000 # no need to make this actually configurable at the moment @trust_level = yaml['default_trust_level'] + @prefer_html = yaml['prefer_html'] end end end