Skip to content

Commit 7cb5de0

Browse files
committed
Generalize charset converting
1 parent 3fbee96 commit 7cb5de0

File tree

3 files changed

+58
-22
lines changed

3 files changed

+58
-22
lines changed

src/Fetch/MIME.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public static function decode($text, $targetCharset = 'utf-8')
3737
foreach (imap_mime_header_decode($text) as $word) {
3838
$ch = 'default' === $word->charset ? 'ascii' : $word->charset;
3939

40-
$result .= iconv($ch, $targetCharset, $word->text);
40+
$result .= Message::charsetConvert($word->text, $ch, $targetCharset) ?: $text;
4141
}
4242

4343
return $result;

src/Fetch/Message.php

+42-21
Original file line numberDiff line numberDiff line change
@@ -520,27 +520,8 @@ protected function processStructure($structure, $partIdentifier = null)
520520

521521
$messageBody = self::decode($messageBody, $structure->encoding);
522522

523-
if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) {
524-
$mb_converted = false;
525-
if (function_exists('mb_convert_encoding')) {
526-
if (!in_array($parameters['charset'], mb_list_encodings())) {
527-
if ($structure->encoding === 0) {
528-
$parameters['charset'] = 'US-ASCII';
529-
} else {
530-
$parameters['charset'] = 'UTF-8';
531-
}
532-
}
533-
534-
$messageBody = @mb_convert_encoding($messageBody, self::$charset, $parameters['charset']);
535-
$mb_converted = true;
536-
}
537-
if (!$mb_converted) {
538-
$messageBodyConv = @iconv($parameters['charset'], self::$charset . self::$charsetFlag, $messageBody);
539-
540-
if ($messageBodyConv !== false) {
541-
$messageBody = $messageBodyConv;
542-
}
543-
}
523+
if (!empty($parameters['charset'])) {
524+
$messageBody = self::charsetConvert($messageBody, $parameters['charset'], self::$charset) ?: $messageBody;
544525
}
545526

546527
if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) {
@@ -575,6 +556,46 @@ protected function processStructure($structure, $partIdentifier = null)
575556
}
576557
}
577558

559+
/**
560+
* @param string $text
561+
* @param string $from
562+
* @param string $to
563+
*
564+
* @return string|null
565+
*/
566+
public static function charsetConvert($text, $from, $to = null)
567+
{
568+
if (!$text) {
569+
return '';
570+
}
571+
572+
if (null === $to) {
573+
$to = self::$charset;
574+
}
575+
576+
$from = strtolower($from);
577+
$to = strtolower($to);
578+
579+
if ($from === $to) {
580+
return $text;
581+
}
582+
583+
$converted = null;
584+
if (!$converted && function_exists('mb_convert_encoding') && @mb_check_encoding($text, $from)) {
585+
$converted = @mb_convert_encoding($text, $to, $from);
586+
}
587+
588+
if (!$converted && function_exists('iconv')) {
589+
$converted = @iconv($from, $to . self::$charsetFlag, $text);
590+
}
591+
592+
if ($converted) {
593+
return $converted;
594+
}
595+
596+
return null;
597+
}
598+
578599
/**
579600
* This function takes in the message data and encoding type and returns the decoded data.
580601
*

tests/Fetch/Test/MessageTest.php

+15
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,21 @@ public function testMoveToMailbox()
240240
$this->assertEquals($sentFolderNumStart + 1, $server->numMessages(), 'Message moved into Sent Folder.');
241241
}
242242

243+
public function testCharsetConvert()
244+
{
245+
$this->assertSame('Привет', Message::charsetConvert(
246+
implode(array_map('chr', array(0xF0, 0xD2, 0xC9, 0xD7, 0xC5, 0xD4))),
247+
'koi8-r',
248+
'utf-8'
249+
));
250+
251+
$this->assertSame('test', Message::charsetConvert('test', 'unk1', 'unk1'), 'Same charsets not try converting');
252+
$this->assertSame('', Message::charsetConvert('', 'unk1', 'unk1'), 'Empty text not try converting');
253+
254+
$this->assertSame(null, Message::charsetConvert('test', 'unk1', 'utf-8'), 'Null when source charset is unknown');
255+
$this->assertSame(null, Message::charsetConvert('test', 'utf-8', 'unk1'), 'Null when destination charset is unknown');
256+
}
257+
243258
public function testDecode()
244259
{
245260
$quotedPrintableDecoded = "Now's the time for all folk to come to the aid of their country.";

0 commit comments

Comments
 (0)