Make ArbitraryBase Unicode-aware (#1299)

* Make ArbitraryBase Unicode-aware

https://mathiasbynens.be/notes/javascript-unicode#counting-symbols

* Fix performance bug and add Unicode test

* Add BigInt version and push output chars to array
This commit is contained in:
lionel-rowe
2023-02-23 22:34:47 +08:00
committed by GitHub
parent 6aa3314b93
commit 0c427580f1
2 changed files with 89 additions and 20 deletions

View File

@ -1,20 +1,36 @@
/** /**
* Converts a string from one base to other * Divide two numbers and get the result of floor division and remainder
* @param {number} dividend
* @param {number} divisor
* @returns {[result: number, remainder: number]}
*/
const floorDiv = (dividend, divisor) => {
const remainder = dividend % divisor
const result = Math.floor(dividend / divisor)
return [result, remainder]
}
/**
* Converts a string from one base to other. Loses accuracy above the value of `Number.MAX_SAFE_INTEGER`.
* @param {string} stringInBaseOne String in input base * @param {string} stringInBaseOne String in input base
* @param {string} baseOneCharacters Character set for the input base * @param {string} baseOneCharacters Character set for the input base
* @param {string} baseTwoCharacters Character set for the output base * @param {string} baseTwoCharacters Character set for the output base
* @returns {string} * @returns {string}
*/ */
const convertArbitraryBase = (stringInBaseOne, baseOneCharacters, baseTwoCharacters) => { const convertArbitraryBase = (stringInBaseOne, baseOneCharacterString, baseTwoCharacterString) => {
if ([stringInBaseOne, baseOneCharacters, baseTwoCharacters].map(arg => typeof arg).some(type => type !== 'string')) { if ([stringInBaseOne, baseOneCharacterString, baseTwoCharacterString].map(arg => typeof arg).some(type => type !== 'string')) {
throw new TypeError('Only string arguments are allowed') throw new TypeError('Only string arguments are allowed')
} }
[baseOneCharacters, baseTwoCharacters].forEach(baseString => {
const charactersInBase = [...baseString] const baseOneCharacters = [...baseOneCharacterString]
const baseTwoCharacters = [...baseTwoCharacterString]
for (const charactersInBase of [baseOneCharacters, baseTwoCharacters]) {
if (charactersInBase.length !== new Set(charactersInBase).size) { if (charactersInBase.length !== new Set(charactersInBase).size) {
throw new TypeError('Duplicate characters in character set are not allowed') throw new TypeError('Duplicate characters in character set are not allowed')
} }
}) }
const reversedStringOneChars = [...stringInBaseOne].reverse() const reversedStringOneChars = [...stringInBaseOne].reverse()
const stringOneBase = baseOneCharacters.length const stringOneBase = baseOneCharacters.length
let value = 0 let value = 0
@ -27,24 +43,57 @@ const convertArbitraryBase = (stringInBaseOne, baseOneCharacters, baseTwoCharact
value += (digitNumber * placeValue) value += (digitNumber * placeValue)
placeValue *= stringOneBase placeValue *= stringOneBase
} }
let stringInBaseTwo = '' const outputChars = []
const stringTwoBase = baseTwoCharacters.length const stringTwoBase = baseTwoCharacters.length
while (value > 0) { while (value > 0) {
const remainder = value % stringTwoBase const [divisionResult, remainder] = floorDiv(value, stringTwoBase)
stringInBaseTwo = baseTwoCharacters.charAt(remainder) + stringInBaseTwo outputChars.push(baseTwoCharacters[remainder])
value /= stringTwoBase value = divisionResult
} }
const baseTwoZero = baseTwoCharacters.charAt(0) return outputChars.reverse().join('') || baseTwoCharacters[0]
return stringInBaseTwo.replace(new RegExp(`^${baseTwoZero}+`), '')
} }
export { convertArbitraryBase } /**
* Converts a arbitrary-length string from one base to other. Doesn't lose accuracy.
* @param {string} stringInBaseOne String in input base
* @param {string} baseOneCharacters Character set for the input base
* @param {string} baseTwoCharacters Character set for the output base
* @returns {string}
*/
const convertArbitraryBaseBigIntVersion = (stringInBaseOne, baseOneCharacterString, baseTwoCharacterString) => {
if ([stringInBaseOne, baseOneCharacterString, baseTwoCharacterString].map(arg => typeof arg).some(type => type !== 'string')) {
throw new TypeError('Only string arguments are allowed')
}
// > convertArbitraryBase('98', '0123456789', '01234567') const baseOneCharacters = [...baseOneCharacterString]
// '142' const baseTwoCharacters = [...baseTwoCharacterString]
// > convertArbitraryBase('98', '0123456789', 'abcdefgh') for (const charactersInBase of [baseOneCharacters, baseTwoCharacters]) {
// 'bec' if (charactersInBase.length !== new Set(charactersInBase).size) {
throw new TypeError('Duplicate characters in character set are not allowed')
}
}
const reversedStringOneChars = [...stringInBaseOne].reverse()
const stringOneBase = BigInt(baseOneCharacters.length)
let value = 0n
let placeValue = 1n
for (const digit of reversedStringOneChars) {
const digitNumber = BigInt(baseOneCharacters.indexOf(digit))
if (digitNumber === -1n) {
throw new TypeError(`Not a valid character: ${digit}`)
}
value += (digitNumber * placeValue)
placeValue *= stringOneBase
}
const outputChars = []
const stringTwoBase = BigInt(baseTwoCharacters.length)
while (value > 0n) {
const divisionResult = value / stringTwoBase
const remainder = value % stringTwoBase
outputChars.push(baseTwoCharacters[remainder])
value = divisionResult
}
return outputChars.reverse().join('') || baseTwoCharacters[0]
}
// > convertArbitraryBase('129', '0123456789', '01234567') export { convertArbitraryBase, convertArbitraryBaseBigIntVersion }
// '201'

View File

@ -1,4 +1,4 @@
import { convertArbitraryBase } from '../ArbitraryBase' import { convertArbitraryBase, convertArbitraryBaseBigIntVersion } from '../ArbitraryBase'
test('Check the answer of convertArbitraryBase(98, 0123456789, 01234567) is 142', () => { test('Check the answer of convertArbitraryBase(98, 0123456789, 01234567) is 142', () => {
const res = convertArbitraryBase('98', '0123456789', '01234567') const res = convertArbitraryBase('98', '0123456789', '01234567')
@ -34,3 +34,23 @@ test('Check the answer of convertArbitraryBase(111, 0123456789, abcdefgh) is bfh
const res = convertArbitraryBase('111', '0123456789', 'abcdefgh') const res = convertArbitraryBase('111', '0123456789', 'abcdefgh')
expect(res).toBe('bfh') expect(res).toBe('bfh')
}) })
test('Unicode awareness', () => {
const res = convertArbitraryBase('98', '0123456789', '💝🎸🦄')
expect(res).toBe('🎸💝🎸🦄🦄')
})
test('zero', () => {
const res = convertArbitraryBase('0', '0123456789', 'abc')
expect(res).toBe('a')
})
test('BigInt version with input string of arbitrary length', () => {
const resBigIntVersion = convertArbitraryBaseBigIntVersion(
String(10n ** 100n),
'0123456789',
'0123456789abcdefghijklmnopqrstuvwxyz'
)
expect(resBigIntVersion).toBe((10n ** 100n).toString(36))
})