From 3d21311f120fe894b8c7a4279365fd17f0476d4c Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 22 Jul 2023 20:32:05 -0700 Subject: [PATCH 1/2] Load dictionary into Entry map --- dict/main.go | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/dict/main.go b/dict/main.go index 5234d26..b234102 100644 --- a/dict/main.go +++ b/dict/main.go @@ -13,7 +13,7 @@ import ( "github.com/gorilla/mux" ) -var dict jmdict.Jmdict +var dict map[string]Entry func LoadDict() error { const jmdictFile = "JMdict.xml" @@ -21,10 +21,15 @@ func LoadDict() error { if err != nil { return err } - dict, _, err = jmdict.LoadJmdict(reader) + jmdict, _, err := jmdict.LoadJmdict(reader) if err != nil { return err } + dict = make(map[string]Entry) + for _, jmdictEntry := range jmdict.Entries { + entry := ParseEntry(&jmdictEntry) + dict[entry.Kanji] = entry + } return nil } @@ -39,7 +44,7 @@ type Definition struct { PartOfSpeech []string } -func ParseEntry(entry jmdict.JmdictEntry) Entry { +func ParseEntry(entry *jmdict.JmdictEntry) Entry { kanji := "" if len(entry.Kanji) > 0 { kanji = entry.Kanji[0].Expression @@ -75,26 +80,22 @@ func Search(query string) queryResult { otherResults := make([]Entry, 0) truncated := false count := 0 - for _, jmdictEntry := range dict.Entries { + for kanji := range dict { exactMatch := false - for _, kanji := range jmdictEntry.Kanji { - if kanji.Expression == query { - exactMatch = true - goto match - } - if strings.Contains(kanji.Expression, query) { - goto match - } + entry := dict[kanji] + if kanji == query { + exactMatch = true + goto match + } + if strings.Contains(kanji, query) { + goto match } // TODO: Skip if query contains kanji - for _, reading := range jmdictEntry.Readings { - if strings.Contains(reading.Reading, query) { - goto match - } + if strings.Contains(entry.Reading, query) { + goto match } continue match: - entry := ParseEntry(jmdictEntry) if exactMatch { exactResults = append(exactResults, entry) } else { @@ -116,9 +117,8 @@ func Search(query string) queryResult { } func Lookup(word string) *Entry { - for _, jmdictEntry := range dict.Entries { - entry := ParseEntry(jmdictEntry) - if entry.Kanji == word { + for kanji, entry := range dict { + if kanji == word { return &entry } } From 91fb5d9e3b5e49d2547a151290a3438362fcee57 Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 22 Jul 2023 21:05:40 -0700 Subject: [PATCH 2/2] Implement dictionary data serialization to improve startup times --- dict/.gitignore | 3 ++- dict/main.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/dict/.gitignore b/dict/.gitignore index 30e4d8d..18e00d2 100644 --- a/dict/.gitignore +++ b/dict/.gitignore @@ -1 +1,2 @@ -JMdict.xml \ No newline at end of file +JMdict.xml +dict.bin \ No newline at end of file diff --git a/dict/main.go b/dict/main.go index b234102..073ab2e 100644 --- a/dict/main.go +++ b/dict/main.go @@ -1,6 +1,7 @@ package main import ( + "encoding/gob" "encoding/json" "fmt" "log" @@ -16,6 +17,21 @@ import ( var dict map[string]Entry func LoadDict() error { + // Loading from binary + const binaryFile = "dict.bin" + file, err := os.Open(binaryFile) + if err != nil { + if !os.IsNotExist(err) { + return err + } + } else { + defer file.Close() + decoder := gob.NewDecoder(file) + err = decoder.Decode(&dict) + return err + } + + // Loading from JMdict const jmdictFile = "JMdict.xml" reader, err := os.Open(jmdictFile) if err != nil { @@ -30,6 +46,19 @@ func LoadDict() error { entry := ParseEntry(&jmdictEntry) dict[entry.Kanji] = entry } + + // Encoding to binary + file, err = os.Create(binaryFile) + if err != nil { + return err + } + defer file.Close() + encoder := gob.NewEncoder(file) + err = encoder.Encode(&dict) + if err != nil { + return err + } + return nil }