Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 136 additions & 2 deletions spark-md5.js
Original file line number Diff line number Diff line change
Expand Up @@ -377,10 +377,144 @@
/**
* Helpers.
*/
var letterFCharCode = "f".charCodeAt(0);
var zeroCharCode = "0".charCodeAt(0);

function clampHexValue(c) {
var charCodeNum = c.charCodeAt(0);
charCodeNum -= zeroCharCode;
if (charCodeNum <= 9) {
return charCodeNum;
}
var aCharCode = "a".charCodeAt(0);
charCodeNum = (charCodeNum | 0x20) - (aCharCode - zeroCharCode);
if (charCodeNum <= 5) {
return charCodeNum + 10;
}
return -1;
};

function twoDigitHex(char1, char2) {
if (!char1) {
return -1;
}
if (!char2) {
return -1;
}

var char1CharCode = char1.charCodeAt(0);
var char2CharCode = char2.charCodeAt(0);
if (char1CharCode > letterFCharCode) {
return -1;
}
var high = clampHexValue(char1);
if (high == -1) {
return -1;
}
if (char2CharCode > letterFCharCode) {
return -1;
}
var low = clampHexValue(char2);
if (low == -1) {
return -1;
}
return (high << 4) + low;
};
//JS implementation of unescaping a character
//from v8 https://github.com/v8/v8/blob/main/src/strings/uri.cc#L329
function unescapeChar(arr, i, charLength) {
var hi = 0;
var lo = 0;
var character = arr[i];

var encodingPresent = character == "%";
var longHexChars = i <= charLength - 6;
var unicodeChar = arr[i + 1] == "u";
var firstTwoHexChars = twoDigitHex(arr[i + 2], arr[i + 3]);
var lastTwoHexChars = twoDigitHex(arr[i + 4], arr[i + 5]);

var validFourHex = firstTwoHexChars > -1 && lastTwoHexChars > -1;
var startOfEscapeSeq = encodingPresent && unicodeChar;

var escapeFourDigitHex = startOfEscapeSeq && longHexChars && validFourHex;

if (escapeFourDigitHex) {
hi = firstTwoHexChars;
lo = lastTwoHexChars;
return {
value: (hi << 8) + lo,
step: 6,
};
}

var shortHexChars = i <= charLength - 3;
var shortTwoHexChars = twoDigitHex(arr[i + 1], arr[i + 2]);
var validShortHex = shortTwoHexChars > -1;

var escapeTwoDigitHex = encodingPresent && shortHexChars && validShortHex;

if (escapeTwoDigitHex) {
lo = shortTwoHexChars;
return {
value: lo,
step: 3,
};
} else {
return {
value: character,
step: 1,
};
}
};

function findEncodingIndex(str) {
var index = -1;

for (var i = 0; i < str.length; i++) {
var containsPercent = str[i] === "%";
var percentNotFound = index === -1;

if (containsPercent && percentNotFound) {
index = i;
}
}
return index;
};

function legacyUnescape(str) {
if (typeof str != "string") {
return "";
}

var encodingIndex = findEncodingIndex(str);

var noEscapeChars = encodingIndex < 0;
if (noEscapeChars) {
return str;
}

var len = str.length;
var arr = str.split("");
var destPosition = 0;
var unescapedStr = "";
unescapedStr += str.substring(encodingIndex, 0);

for (var i = encodingIndex; i < len; destPosition += 1) {
var { value, step } = unescapeChar(arr, i, len);
if (/[0-9]/.test(value)) {
unescapedStr += String.fromCharCode.call(this, value);
} else {
unescapedStr += value;
}
i += step;
}
return unescapedStr;
};

function toUtf8(str) {
if (/[\u0080-\uFFFF]/.test(str)) {
str = unescape(encodeURIComponent(str));
var containsUnicodeChars = /[\u0080-\uFFFF]/.test(str)
if (containsUnicodeChars) {
str = legacyUnescape(encodeURIComponent(str));
}

return str;
Expand Down
156 changes: 152 additions & 4 deletions test/specs.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,142 @@
var hasher = new SparkMD5(),
buffHasher = new SparkMD5.ArrayBuffer();

const letterFCharCode = "f".charCodeAt(0);
const zeroCharCode = "0".charCodeAt(0);

const hexValue = (c) => {
let charCodeNum = c.charCodeAt(0);
charCodeNum -= zeroCharCode;
if (charCodeNum <= 9) {
return charCodeNum;
}
let aCharCode = "a".charCodeAt(0);
charCodeNum = (charCodeNum | 0x20) - (aCharCode - zeroCharCode);
if (charCodeNum <= 5) {
return charCodeNum + 10;
}
return -1;
};

const twoDigitHex = (char1, char2) => {
if (!char1) {
return -1;
}
if (!char2) {
return -1;
}

let char1CharCode = char1.charCodeAt(0);
let char2CharCode = char2.charCodeAt(0);
if (char1CharCode > letterFCharCode) {
return -1;
}
let high = hexValue(char1);
if (high == -1) {
return -1;
}
if (char2CharCode > letterFCharCode) {
return -1;
}
let low = hexValue(char2);
if (low == -1) {
return -1;
}
return (high << 4) + low;
};

//JS implementation of unescaping a character
//from v8 https://github.com/v8/v8/blob/main/src/strings/uri.cc#L329

const unescapeChar = (arr, i, charLength) => {
let hi = 0;
let lo = 0;
let character = arr[i];

const encodingPresent = character == "%";
const longHexChars = i <= charLength - 6;
const unicodeChar = arr[i + 1] == "u";
const firstTwoHexChars = twoDigitHex(arr[i + 2], arr[i + 3]);
const lastTwoHexChars = twoDigitHex(arr[i + 4], arr[i + 5]);

const validFourHex = firstTwoHexChars > -1 && lastTwoHexChars > -1;
const startOfEscapeSeq = encodingPresent && unicodeChar;

const escapeFourDigitHex = startOfEscapeSeq && longHexChars && validFourHex;

if (escapeFourDigitHex) {
hi = firstTwoHexChars;
lo = lastTwoHexChars;
return {
value: (hi << 8) + lo,
step: 6,
};
}

const shortHexChars = i <= charLength - 3;
const shortTwoHexChars = twoDigitHex(arr[i + 1], arr[i + 2]);
const validShortHex = shortTwoHexChars > -1;

const escapeTwoDigitHex = encodingPresent && shortHexChars && validShortHex;

if (escapeTwoDigitHex) {
lo = shortTwoHexChars;
return {
value: lo,
step: 3,
};
} else {
return {
value: character,
step: 1,
};
}
};

const findEncodingIndex = (str) => {
let index = -1;

for (let i = 0; i < str.length; i++) {
let containsPercent = str[i] === "%";
let percentNotFound = index === -1;

if (containsPercent && percentNotFound) {
index = i;
}
}
return index;
};

const legacyUnescape = (str) => {
if (typeof str != "string") {
return "";
}

const encodingIndex = findEncodingIndex(str);

const noEscapeChars = encodingIndex < 0;
if (noEscapeChars) {
return str;
}

let len = str.length;
let arr = str.split("");
let destPosition = 0;
let unescapedStr = "";
unescapedStr += str.substring(encodingIndex, 0);

for (let i = encodingIndex; i < len; destPosition += 1) {
let { value, step } = unescapeChar(arr, i, len);
if (/[0-9]/.test(value)) {
unescapedStr += String.fromCharCode.call(this, value);
} else {
unescapedStr += value;
}
i += step;
}
return unescapedStr;
};

function unicodeStringToArrayBuffer(str) {
if (/[\u0080-\uFFFF]/.test(str)) {
str = unescape(encodeURIComponent(str));
Expand All @@ -13,9 +149,9 @@ function unicodeStringToArrayBuffer(str) {

function stringToArrayBuffer(str) {
var length = str.length,
buff = new ArrayBuffer(length),
arr = new Uint8Array(buff),
i;
buff = new ArrayBuffer(length),
arr = new Uint8Array(buff),
i;

for (i = 0; i < length; i += 1) {
arr[i] = str.charCodeAt(i);
Expand Down Expand Up @@ -359,6 +495,18 @@ test('UTF-8', function () {
equal(buffHasher.end(), '453931ab48a4a5af69f3da3c21064fc9', 'Incremental (array buffer) of "' + str + '"');
});


test('legacyUnescape', function () {
// encoded value of 'räksmörgås';
equal(legacyUnescape('r%C3%A4ksm%C3%B6rg%C3%A5s'), 'räksmörgås', 'legacyUnescape() of "r%C3%A4ksm%C3%B6rg%C3%A5s"');
equal(legacyUnescape('%E4%F6%FC'), 'äöü', 'legacyUnescape() of "r%C3%A4ksm%C3%B6rg%C3%A5s"');
equal(legacyUnescape('%u0107'), 'ć', 'legacyUnescape() of "%u0107"');
equal(legacyUnescape('%E4'), 'ä', 'legacyUnescape() of "%E4"');
equal(legacyUnescape('abc123'), 'abc123', 'legacyUnescape() found no escape sequences for "abc123"');
equal(legacyUnescape(''), '', 'legacyUnescape() should find no escape sequences and should return empty "" ');
equal(legacyUnescape('スを食'), 'スを食', 'legacyUnescape() should find no escape sequences and should return "スを食" ');
});

test('Hashing a PNG - ArrayBuffer vs binary string', function () {
var binString,
buffer;
Expand All @@ -374,4 +522,4 @@ test('Hashing a PNG - ArrayBuffer vs binary string', function () {
hasher.appendBinary(binString);

equal(buffHasher.end(), hasher.end(), 'md5 sum should be the same for both binary strings and ArrayBuffers');
});
});