import { googleNgramTop10000Plus } from "./gwords"
import * as _ from 'lodash'
import { britishToAmerican } from "./dialects"
import { exclusions } from "./exclusions"

interface RawDictionary {
    sampleOne(): string
}

class WeightedDictionary implements RawDictionary {
    constructor(private data: [string, number][]) {
        this.frequencySum = data.reduce((total, [word, frequency]) => total + frequency, 0)
    }
    private frequencySum: number
    sampleOne(): string {
        let position = Math.random() * this.frequencySum
        let index = -1
        while (position >= 0) {
            position -= this.data[++index][1]
        }
        return this.data[index][0]
    }
}

class FilteredDictionary implements RawDictionary {
    constructor(private underlying: RawDictionary, private exclusions: Set<string>) { }
    sampleOne() {
        do {
            var word = this.underlying.sampleOne()
        } while (this.exclusions?.has(word))
        return word
    }
}

export class Dictionary implements RawDictionary {
    constructor(private underlying: RawDictionary) { }
    sampleOne(exclusions?: Set<string>): string {
        do {
            var word = this.underlying.sampleOne()
        } while (exclusions?.has(word))
        return word
    }
    sample(n: number, exclusions?: Set<string>): string[] {
        const result = new Set<string>()
        while (result.size < n) {
            result.add(this.sampleOne(exclusions))
        }
        return [...result]
    }
}

// TODO: make UK & US dialects
export const weightedEnglishSuperset = new Dictionary(new FilteredDictionary(new WeightedDictionary(googleNgramTop10000Plus), exclusions))
export const weightedEnglishSubset = new Dictionary(new FilteredDictionary(weightedEnglishSuperset, new Set(_.flatten(britishToAmerican))))