Add proper furigana

main
Elnu 2 years ago
parent 91fb5d9e3b
commit 2b85099901

1
dict/.gitignore vendored

@ -1,2 +1,3 @@
JMdict.xml JMdict.xml
JmdictFurigana.txt
dict.bin dict.bin

@ -9,6 +9,13 @@ Its primary goals are:
### Configuration ### Configuration
#### JMdict
1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time. 1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time.
2. Extract the archive 2. Extract the archive
3. Rename the file to JMDict.xml 3. Rename the file to JMDict.xml
#### JmdictFurigana
1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases)
2. Ensure it's named JmdictFurigana.txt.

@ -1,12 +1,14 @@
package main package main
import ( import (
"bufio"
"encoding/gob" "encoding/gob"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
"os" "os"
"strconv"
"strings" "strings"
"foosoft.net/projects/jmdict" "foosoft.net/projects/jmdict"
@ -41,9 +43,30 @@ func LoadDict() error {
if err != nil { if err != nil {
return err return err
} }
const jmdictFuriganaFile = "JmdictFurigana.txt"
reader, err = os.Open(jmdictFuriganaFile)
if err != nil {
return err
}
scanner := bufio.NewScanner(reader)
furiganaData := make(map[string]string)
for scanner.Scan() {
params := strings.Split(scanner.Text(), "|")
furiganaData[params[0]] = params[2]
}
dict = make(map[string]Entry) dict = make(map[string]Entry)
for _, jmdictEntry := range jmdict.Entries { for _, jmdictEntry := range jmdict.Entries {
entry := ParseEntry(&jmdictEntry) // お願い致します|おねがいいたします|1:ねが;3:いた
var furiganaInfo *string
if len(jmdictEntry.Kanji) > 0 {
data := furiganaData[jmdictEntry.Kanji[0].Expression]
furiganaInfo = &data
} else {
furiganaInfo = nil
}
entry := ParseEntry(&jmdictEntry, furiganaInfo)
dict[entry.Kanji] = entry dict[entry.Kanji] = entry
} }
@ -62,8 +85,15 @@ func LoadDict() error {
return nil return nil
} }
type Furigana struct {
Kanji string
Furigana string
}
type Entry struct { type Entry struct {
Kanji string Kanji string
// Mapping of character index to furigana
Furigana []Furigana
Reading string Reading string
Definitions []Definition Definitions []Definition
} }
@ -73,7 +103,7 @@ type Definition struct {
PartOfSpeech []string PartOfSpeech []string
} }
func ParseEntry(entry *jmdict.JmdictEntry) Entry { func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry {
kanji := "" kanji := ""
if len(entry.Kanji) > 0 { if len(entry.Kanji) > 0 {
kanji = entry.Kanji[0].Expression kanji = entry.Kanji[0].Expression
@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry {
PartOfSpeech: sense.PartsOfSpeech, PartOfSpeech: sense.PartsOfSpeech,
} }
} }
// 1:ねが;3:いた
var furiganaList []Furigana
if kanji == "" || *furiganaInfo == "" {
furiganaList = []Furigana{{Kanji: reading, Furigana: ""}}
} else {
furiganaEntries := strings.Split(*furiganaInfo, ";")
// ["1:ねが", "3:いた"]
type rawFurigana struct {
from int
to int
furigana string
}
ruby := make([]rawFurigana, 0)
for _, entry := range furiganaEntries {
// 1:ねが
// multiple: 0-1:きょう
params := strings.Split(entry, ":")
// ["1", "ねが"]
// multiple: ["0-1", "きょう"]
indexRange := strings.Split(params[0], "-")
// [1]
// multiple: [0, 1]
var from, to int
if len(indexRange) == 1 {
index, _ := strconv.Atoi(indexRange[0])
from, to = index, index
} else {
from, _ = strconv.Atoi(indexRange[0])
to, _ = strconv.Atoi(indexRange[1])
}
ruby = append(ruby, rawFurigana{
from: from,
to: to,
furigana: params[1],
})
}
furiganaList = make([]Furigana, 0)
slice := func(from, to int) string {
return string([]rune(kanji)[from : to+1])
}
nextIndex := 0
for _, raw := range ruby {
if raw.from > nextIndex {
furiganaList = append(furiganaList, Furigana{
Kanji: slice(nextIndex, raw.from-1),
Furigana: "",
})
}
furiganaList = append(furiganaList, Furigana{
Kanji: slice(raw.from, raw.to),
Furigana: raw.furigana,
})
nextIndex = raw.to + 1
}
}
return Entry{ return Entry{
Kanji: kanji, Kanji: kanji,
Furigana: furiganaList,
Reading: reading, Reading: reading,
Definitions: definitions, Definitions: definitions,
} }

@ -1,11 +1,15 @@
{{ define "entry" }} {{ define "entry" }}
<div class="box"> <div class="box">
<h3> <h3 lang="ja">
{{- if .Kanji -}} <a href="/word/{{ .Kanji }}">
<a href="/word/{{ .Kanji }}"><ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby></a> {{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}} {{- else -}}
{{- .Reading -}} {{- .Kanji -}}
{{- end -}} {{- end -}}
{{- end -}}
</a>
</h3> </h3>
{{- $count := len .Definitions -}} {{- $count := len .Definitions -}}
{{ if eq $count 1 -}} {{ if eq $count 1 -}}

@ -1,11 +1,15 @@
{{ define "entryfull" }} {{ define "entryfull" }}
<div class="box"> <div class="box">
<h3> <h3 lang="ja">
{{- if .Kanji -}} <a href="/word/{{ .Kanji }}">
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby> {{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}} {{- else -}}
{{- .Reading -}} {{- .Kanji -}}
{{- end -}} {{- end -}}
{{- end -}}
</a>
</h3> </h3>
{{- $count := len .Definitions -}} {{- $count := len .Definitions -}}
{{ if eq $count 1 -}} {{ if eq $count 1 -}}