// source: https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/PG/2006/04/1-10000

// each word has a weight, which is the number of times it appears in the corpus
import * as WORDS_JSON from "../lib/words.json";
const WORDS = Array.from(WORDS_JSON);

const totalWeight = WORDS.reduce((acc, [_word, weight]) => acc + weight, 0);
let lastCumulativeWeight = 0;
const wordsWithCumulativeWeight = WORDS.map(([word, weight]) => {
  const cumulativeWeight = lastCumulativeWeight + weight;
  lastCumulativeWeight = cumulativeWeight;
  return [word, cumulativeWeight];
});

// use binary search on `wordsWithCumulativeWeight` to efficiently find the word
export const getRandomWord = () => {
  const randomWeight = Math.random() * totalWeight;
  let low = 0;
  let high = wordsWithCumulativeWeight.length - 1;
  while (low <= high - 1) {
    const mid = Math.floor((low + high) / 2);
    const [word, cumulativeWeight] = wordsWithCumulativeWeight[mid];
    if (cumulativeWeight > randomWeight) {
      high = mid;
    } else if (cumulativeWeight < randomWeight) {
      low = mid + 1;
    } else {
      return word;
    }
  }
  return wordsWithCumulativeWeight[low][0];
};
