@@ -122,7 +122,7 @@ def self.rtf2text str, format=:text
122122 end
123123 end
124124
125- RTF_PREBUF =
125+ RTF_PREBUF =
126126 "{\\ rtf1\\ ansi\\ mac\\ deff0\\ deftab720{\\ fonttbl;}" \
127127 "{\\ f0\\ fnil \\ froman \\ fswiss \\ fmodern \\ fscript " \
128128 "\\ fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
@@ -175,7 +175,7 @@ def rtfdecompr data
175175 else # unknown magic number
176176 raise "Unknown compression type (magic number 0x%08x)" % magic
177177 end
178-
178+
179179 # not sure if its due to a bug in the above code. doesn't seem to be
180180 # in my tests, but sometimes there's a trailing null. we chomp it here,
181181 # which actually makes the resultant rtf smaller than its advertised
@@ -189,7 +189,7 @@ def rtfdecompr data
189189 #
190190 # Returns +nil+ if it doesn't look like an rtf encapsulated rtf.
191191 #
192- # Some cases that the original didn't deal with have been patched up, eg from
192+ # Some cases that the original didn't deal with have been patched up, eg from
193193 # this chunk, where there are tags outside of the htmlrtf ignore block.
194194 #
195195 # "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..."
@@ -229,8 +229,14 @@ def rtfdecompr data
229229 def rtf2html rtf
230230 scan = StringScanner . new rtf
231231 # require \fromhtml. is this worth keeping? apparently you see \\fromtext if it
232- # was converted from plain text.
232+ # was converted from plain text.
233233 return nil unless rtf [ "\\ fromhtml" ]
234+ if scan . scan_until ( /\\ ansicpg/ )
235+ code_page = "cp" + scan . scan ( /\d +/ )
236+ scan . pos = 0
237+ else
238+ code_page = 'ascii'
239+ end
234240 html = ''
235241 ignore_tag = nil
236242 # skip up to the first htmltag. return nil if we don't ever find one
@@ -270,7 +276,7 @@ def rtf2html rtf
270276 p :wtf
271277 end
272278 end
273- html . strip . empty? ? nil : html
279+ html . strip . empty? ? nil : html . encode ( 'utf-8' , code_page )
274280 end
275281
276282 module_function :rtf2html , :rtfdecompr
0 commit comments