diff --git a/dict/.gitignore b/dict/.gitignore index 18e00d2..1a06b16 100644 --- a/dict/.gitignore +++ b/dict/.gitignore @@ -1,2 +1,3 @@ JMdict.xml +JmdictFurigana.txt dict.bin \ No newline at end of file diff --git a/dict/README.md b/dict/README.md index bb0c063..7518875 100644 --- a/dict/README.md +++ b/dict/README.md @@ -9,6 +9,13 @@ Its primary goals are: ### Configuration +#### JMdict + 1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time. 2. Extract the archive -3. Rename the file to JMDict.xml \ No newline at end of file +3. Rename the file to JMDict.xml + +#### JmdictFurigana + +1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases) +2. Ensure it's named JmdictFurigana.txt. \ No newline at end of file diff --git a/dict/main.go b/dict/main.go index 073ab2e..7ac5f0c 100644 --- a/dict/main.go +++ b/dict/main.go @@ -1,12 +1,14 @@ package main import ( + "bufio" "encoding/gob" "encoding/json" "fmt" "log" "net/http" "os" + "strconv" "strings" "foosoft.net/projects/jmdict" @@ -41,9 +43,30 @@ func LoadDict() error { if err != nil { return err } + + const jmdictFuriganaFile = "JmdictFurigana.txt" + reader, err = os.Open(jmdictFuriganaFile) + if err != nil { + return err + } + scanner := bufio.NewScanner(reader) + furiganaData := make(map[string]string) + for scanner.Scan() { + params := strings.Split(scanner.Text(), "|") + furiganaData[params[0]] = params[2] + } + dict = make(map[string]Entry) for _, jmdictEntry := range jmdict.Entries { - entry := ParseEntry(&jmdictEntry) + // お願い致します|おねがいいたします|1:ねが;3:いた + var furiganaInfo *string + if len(jmdictEntry.Kanji) > 0 { + data := furiganaData[jmdictEntry.Kanji[0].Expression] + furiganaInfo = &data + } else { + furiganaInfo = nil + } + entry := ParseEntry(&jmdictEntry, furiganaInfo) dict[entry.Kanji] = entry } @@ -62,8 +85,15 @@ func LoadDict() error { return nil } +type Furigana struct { + Kanji string + Furigana string +} + type Entry struct { - Kanji string + Kanji string + // Mapping of character index to furigana + Furigana []Furigana Reading string Definitions []Definition } @@ -73,7 +103,7 @@ type Definition struct { PartOfSpeech []string } -func ParseEntry(entry *jmdict.JmdictEntry) Entry { +func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry { kanji := "" if len(entry.Kanji) > 0 { kanji = entry.Kanji[0].Expression @@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry { PartOfSpeech: sense.PartsOfSpeech, } } + // 1:ねが;3:いた + var furiganaList []Furigana + if kanji == "" || *furiganaInfo == "" { + furiganaList = []Furigana{{Kanji: reading, Furigana: ""}} + } else { + furiganaEntries := strings.Split(*furiganaInfo, ";") + // ["1:ねが", "3:いた"] + type rawFurigana struct { + from int + to int + furigana string + } + ruby := make([]rawFurigana, 0) + for _, entry := range furiganaEntries { + // 1:ねが + // multiple: 0-1:きょう + params := strings.Split(entry, ":") + // ["1", "ねが"] + // multiple: ["0-1", "きょう"] + indexRange := strings.Split(params[0], "-") + // [1] + // multiple: [0, 1] + var from, to int + if len(indexRange) == 1 { + index, _ := strconv.Atoi(indexRange[0]) + from, to = index, index + } else { + from, _ = strconv.Atoi(indexRange[0]) + to, _ = strconv.Atoi(indexRange[1]) + } + ruby = append(ruby, rawFurigana{ + from: from, + to: to, + furigana: params[1], + }) + } + furiganaList = make([]Furigana, 0) + slice := func(from, to int) string { + return string([]rune(kanji)[from : to+1]) + } + nextIndex := 0 + for _, raw := range ruby { + if raw.from > nextIndex { + furiganaList = append(furiganaList, Furigana{ + Kanji: slice(nextIndex, raw.from-1), + Furigana: "", + }) + } + furiganaList = append(furiganaList, Furigana{ + Kanji: slice(raw.from, raw.to), + Furigana: raw.furigana, + }) + nextIndex = raw.to + 1 + } + } return Entry{ Kanji: kanji, + Furigana: furiganaList, Reading: reading, Definitions: definitions, } diff --git a/dict/templates/partials/entry.html b/dict/templates/partials/entry.html index 9d74569..de3845e 100644 --- a/dict/templates/partials/entry.html +++ b/dict/templates/partials/entry.html @@ -1,11 +1,15 @@ {{ define "entry" }}
-

- {{- if .Kanji -}} - {{- .Kanji -}}({{- .Reading -}}) - {{- else -}} - {{- .Reading -}} +

+ + {{- range .Furigana -}} + {{- if .Furigana -}} + {{- .Kanji -}}({{- .Furigana -}}) + {{- else -}} + {{- .Kanji -}} + {{- end -}} {{- end -}} +

{{- $count := len .Definitions -}} {{ if eq $count 1 -}} diff --git a/dict/templates/partials/entryfull.html b/dict/templates/partials/entryfull.html index 8488cbc..da28fbf 100644 --- a/dict/templates/partials/entryfull.html +++ b/dict/templates/partials/entryfull.html @@ -1,11 +1,15 @@ {{ define "entryfull" }}
-

- {{- if .Kanji -}} - {{- .Kanji -}}({{- .Reading -}}) - {{- else -}} - {{- .Reading -}} +

+ + {{- range .Furigana -}} + {{- if .Furigana -}} + {{- .Kanji -}}({{- .Furigana -}}) + {{- else -}} + {{- .Kanji -}} + {{- end -}} {{- end -}} +

{{- $count := len .Definitions -}} {{ if eq $count 1 -}}