summaryrefslogtreecommitdiffstats
path: root/labb2/kod.py
blob: 378225f6c4912e5242ecd3a1630c4c2a0e87ee7e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from lib import minimum_edit_distance


def load():
    words = {}
    for line in open("alphabetical.csv", "r").readlines():
        word, freq = line.split(",")
        words[word] = int(freq)
    return words

def main(words):
    """Repeatedly ask the user for word and autocomplete/-correct it."""
    word = ""
    while word != "q":
        word = input("Type word: ").lower()
        print("Autocompletion finished: ", autocomplete(words, word))
        print("Sorted autocompletion: ", autocomplete_best(words, word))
        print("Best three: ", autocomplete_best(words, word, 3))
        print("Autocorrect: ", autocorrect(words, word))


def autocomplete(words, search_word):
    """Return autocomplete suggestions."""
    for word in words.keys():
        if word.startswith(search_word):
            return word
    return None


def autocomplete_best(words, search_word, amount=None):
    """Return the /amount/ most common autocompletions."""
    matching_words = {word: freq for word, freq in words.items()
                      if word.startswith(search_word)}
    matching_words_sorted = sorted(matching_words.items(),
                                   key=lambda item: item[1],  # sort by freq
                                   reverse=True)  # sort most frequent first
    matching_words_sorted_words = [word for word, freq in matching_words_sorted]

    if amount:
        return ", ".join(list(matching_words_sorted_words)[:amount])
    else:
        return ", ".join(list(matching_words_sorted_words))


def autocorrect(words, search_word):
    """Return the word with the smallest Levhenstein distance"""
    best = None
    for word, _ in words.items():
        edit_distance = minimum_edit_distance(search_word, word)
        if not best:
            best = (edit_distance, word)
        if edit_distance < best[0]:
            best = (edit_distance, word)
    return best[1]

main(load())