From 9268922980709a826a4abe3b5aec988162858b41 Mon Sep 17 00:00:00 2001 From: MarkSFrancis Date: Thu, 20 May 2021 15:28:25 +0100 Subject: [PATCH 1/3] Added shuf --- Dynamic-Programming/Shuf.js | 96 +++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 Dynamic-Programming/Shuf.js diff --git a/Dynamic-Programming/Shuf.js b/Dynamic-Programming/Shuf.js new file mode 100644 index 000000000..14bf66c01 --- /dev/null +++ b/Dynamic-Programming/Shuf.js @@ -0,0 +1,96 @@ +/* +Given a data set of an unknown size, +Get a random sample in a random order +It's used in data analytics, often as a way to get a small random sample from a data lake or warehouse, or from a large CSV file +*/ +function shuf(datasetSource, sampleSize) { + let output = fillBaseSample(datasetSource, sampleSize); + + return randomizeOutputFromDataset(datasetSource, output); +} + +/** + * Fills the output if possible, with the minimum number of values + * @param {Iterable.} datasetSource The iterable source of data + * @param {number} sampleSize The size of the sample to extract from the dataset + * @returns {Array.} The random sample, as an array + * @template T + */ +function fillBaseSample(datasetSource, sampleSize) { + let filledIndexes = []; + let output = new Array(sampleSize); + + // Spread data out filling the array + while (true) { + const iterator = datasetSource.next(); + if (iterator.done) break; + + let insertTo = Math.floor(Math.random() * output.length); + while (filledIndexes.includes(insertTo)) { + insertTo++; + if (insertTo === output.length) { + insertTo = 0; + } + } + output[insertTo] = { + value: iterator.value, + }; + + filledIndexes = [...filledIndexes, insertTo]; + + if (filledIndexes.length === sampleSize) { + break; + } + } + + if (filledIndexes.length < output.length) { + // Not a large enough dataset to fill the sample - trim empty values + output = output.filter((_, i) => filledIndexes.includes(i)); + } + + return output.map((o) => o.value); +} + +/** + * Replaces values in the output randomly with new ones from the dataset + * @param {Iterable.} datasetSource The iterable source of data + * @param {Array.} output The output so far, filled with data + * @returns {Array.} The random sample, as an array + * @template T + */ +function randomizeOutputFromDataset(datasetSource, output) { + const newOutput = [...output]; + let readSoFar = output.length; + + while (true) { + const iterator = datasetSource.next(); + if (iterator.done) break; + readSoFar++; + + const insertTo = Math.floor(Math.random() * readSoFar); + if (insertTo < newOutput.length) { + newOutput[insertTo] = iterator.value; + } + } + + return newOutput; +} + +const main = () => { + /** + * Generates a random range of data, with values between 0 and 2^31 - 1 + * @param {number} length The number of data items to generate + * @returns {Iterable} Random iterable data + */ + function* generateRandomData(length) { + const maxValue = Math.pow(2, 31) - 1; + for (let i = 0; i < length; i++) { + yield Math.floor(Math.random() * maxValue); + } + } + + const source = generateRandomData(1000); + const result = shuf(source, 10); + console.log(result) +} +main() From 55ea4efc1d12c58118c67ebbcf2cf32ae45beee4 Mon Sep 17 00:00:00 2001 From: MarkSFrancis Date: Thu, 20 May 2021 15:30:53 +0100 Subject: [PATCH 2/3] Added to directory --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index b432379ee..6ec37c1bd 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -75,6 +75,7 @@ * [MinimumCostPath](https://github.com/TheAlgorithms/Javascript/blob/master/Dynamic-Programming/MinimumCostPath.js) * [NumberOfSubsetEqualToGivenSum](https://github.com/TheAlgorithms/Javascript/blob/master/Dynamic-Programming/NumberOfSubsetEqualToGivenSum.js) * [SieveOfEratosthenes](https://github.com/TheAlgorithms/Javascript/blob/master/Dynamic-Programming/SieveOfEratosthenes.js) + * [Shuf](https://github.com/TheAlgorithms/Javascript/blob/master/Dynamic-Programming/Shuf.js) * [SudokuSolver](https://github.com/TheAlgorithms/Javascript/blob/master/Dynamic-Programming/SudokuSolver.js) * [TrappingRainWater](https://github.com/TheAlgorithms/Javascript/blob/master/Dynamic-Programming/TrappingRainWater.js) * [ZeroOneKnapsack](https://github.com/TheAlgorithms/Javascript/blob/master/Dynamic-Programming/ZeroOneKnapsack.js) From 4f348ccb5b891e71425f7514807dba51a81172ee Mon Sep 17 00:00:00 2001 From: MarkSFrancis Date: Fri, 21 May 2021 16:47:22 +0100 Subject: [PATCH 3/3] Formatted with standard --- Dynamic-Programming/Shuf.js | 62 ++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/Dynamic-Programming/Shuf.js b/Dynamic-Programming/Shuf.js index 14bf66c01..a6767c9a6 100644 --- a/Dynamic-Programming/Shuf.js +++ b/Dynamic-Programming/Shuf.js @@ -3,10 +3,10 @@ Given a data set of an unknown size, Get a random sample in a random order It's used in data analytics, often as a way to get a small random sample from a data lake or warehouse, or from a large CSV file */ -function shuf(datasetSource, sampleSize) { - let output = fillBaseSample(datasetSource, sampleSize); +function shuf (datasetSource, sampleSize) { + const output = fillBaseSample(datasetSource, sampleSize) - return randomizeOutputFromDataset(datasetSource, output); + return randomizeOutputFromDataset(datasetSource, output) } /** @@ -16,39 +16,39 @@ function shuf(datasetSource, sampleSize) { * @returns {Array.} The random sample, as an array * @template T */ -function fillBaseSample(datasetSource, sampleSize) { - let filledIndexes = []; - let output = new Array(sampleSize); +function fillBaseSample (datasetSource, sampleSize) { + let filledIndexes = [] + let output = new Array(sampleSize) // Spread data out filling the array while (true) { - const iterator = datasetSource.next(); - if (iterator.done) break; + const iterator = datasetSource.next() + if (iterator.done) break - let insertTo = Math.floor(Math.random() * output.length); + let insertTo = Math.floor(Math.random() * output.length) while (filledIndexes.includes(insertTo)) { - insertTo++; + insertTo++ if (insertTo === output.length) { - insertTo = 0; + insertTo = 0 } } output[insertTo] = { - value: iterator.value, - }; + value: iterator.value + } - filledIndexes = [...filledIndexes, insertTo]; + filledIndexes = [...filledIndexes, insertTo] if (filledIndexes.length === sampleSize) { - break; + break } } if (filledIndexes.length < output.length) { // Not a large enough dataset to fill the sample - trim empty values - output = output.filter((_, i) => filledIndexes.includes(i)); + output = output.filter((_, i) => filledIndexes.includes(i)) } - return output.map((o) => o.value); + return output.map((o) => o.value) } /** @@ -58,22 +58,22 @@ function fillBaseSample(datasetSource, sampleSize) { * @returns {Array.} The random sample, as an array * @template T */ -function randomizeOutputFromDataset(datasetSource, output) { - const newOutput = [...output]; - let readSoFar = output.length; +function randomizeOutputFromDataset (datasetSource, output) { + const newOutput = [...output] + let readSoFar = output.length while (true) { - const iterator = datasetSource.next(); - if (iterator.done) break; - readSoFar++; + const iterator = datasetSource.next() + if (iterator.done) break + readSoFar++ - const insertTo = Math.floor(Math.random() * readSoFar); + const insertTo = Math.floor(Math.random() * readSoFar) if (insertTo < newOutput.length) { - newOutput[insertTo] = iterator.value; + newOutput[insertTo] = iterator.value } } - return newOutput; + return newOutput } const main = () => { @@ -82,15 +82,15 @@ const main = () => { * @param {number} length The number of data items to generate * @returns {Iterable} Random iterable data */ - function* generateRandomData(length) { - const maxValue = Math.pow(2, 31) - 1; + function * generateRandomData (length) { + const maxValue = Math.pow(2, 31) - 1 for (let i = 0; i < length; i++) { - yield Math.floor(Math.random() * maxValue); + yield Math.floor(Math.random() * maxValue) } } - const source = generateRandomData(1000); - const result = shuf(source, 10); + const source = generateRandomData(1000) + const result = shuf(source, 10) console.log(result) } main()