Add proper furigana

main
Elnu 2 years ago
parent 91fb5d9e3b
commit 2b85099901

1
dict/.gitignore vendored

@ -1,2 +1,3 @@
JMdict.xml
JmdictFurigana.txt
dict.bin

@ -9,6 +9,13 @@ Its primary goals are:
### Configuration
#### JMdict
1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time.
2. Extract the archive
3. Rename the file to JMDict.xml
3. Rename the file to JMDict.xml
#### JmdictFurigana
1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases)
2. Ensure it's named JmdictFurigana.txt.

@ -1,12 +1,14 @@
package main
import (
"bufio"
"encoding/gob"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strconv"
"strings"
"foosoft.net/projects/jmdict"
@ -41,9 +43,30 @@ func LoadDict() error {
if err != nil {
return err
}
const jmdictFuriganaFile = "JmdictFurigana.txt"
reader, err = os.Open(jmdictFuriganaFile)
if err != nil {
return err
}
scanner := bufio.NewScanner(reader)
furiganaData := make(map[string]string)
for scanner.Scan() {
params := strings.Split(scanner.Text(), "|")
furiganaData[params[0]] = params[2]
}
dict = make(map[string]Entry)
for _, jmdictEntry := range jmdict.Entries {
entry := ParseEntry(&jmdictEntry)
// お願い致します|おねがいいたします|1:ねが;3:いた
var furiganaInfo *string
if len(jmdictEntry.Kanji) > 0 {
data := furiganaData[jmdictEntry.Kanji[0].Expression]
furiganaInfo = &data
} else {
furiganaInfo = nil
}
entry := ParseEntry(&jmdictEntry, furiganaInfo)
dict[entry.Kanji] = entry
}
@ -62,8 +85,15 @@ func LoadDict() error {
return nil
}
type Furigana struct {
Kanji string
Furigana string
}
type Entry struct {
Kanji string
Kanji string
// Mapping of character index to furigana
Furigana []Furigana
Reading string
Definitions []Definition
}
@ -73,7 +103,7 @@ type Definition struct {
PartOfSpeech []string
}
func ParseEntry(entry *jmdict.JmdictEntry) Entry {
func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry {
kanji := ""
if len(entry.Kanji) > 0 {
kanji = entry.Kanji[0].Expression
@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry {
PartOfSpeech: sense.PartsOfSpeech,
}
}
// 1:ねが;3:いた
var furiganaList []Furigana
if kanji == "" || *furiganaInfo == "" {
furiganaList = []Furigana{{Kanji: reading, Furigana: ""}}
} else {
furiganaEntries := strings.Split(*furiganaInfo, ";")
// ["1:ねが", "3:いた"]
type rawFurigana struct {
from int
to int
furigana string
}
ruby := make([]rawFurigana, 0)
for _, entry := range furiganaEntries {
// 1:ねが
// multiple: 0-1:きょう
params := strings.Split(entry, ":")
// ["1", "ねが"]
// multiple: ["0-1", "きょう"]
indexRange := strings.Split(params[0], "-")
// [1]
// multiple: [0, 1]
var from, to int
if len(indexRange) == 1 {
index, _ := strconv.Atoi(indexRange[0])
from, to = index, index
} else {
from, _ = strconv.Atoi(indexRange[0])
to, _ = strconv.Atoi(indexRange[1])
}
ruby = append(ruby, rawFurigana{
from: from,
to: to,
furigana: params[1],
})
}
furiganaList = make([]Furigana, 0)
slice := func(from, to int) string {
return string([]rune(kanji)[from : to+1])
}
nextIndex := 0
for _, raw := range ruby {
if raw.from > nextIndex {
furiganaList = append(furiganaList, Furigana{
Kanji: slice(nextIndex, raw.from-1),
Furigana: "",
})
}
furiganaList = append(furiganaList, Furigana{
Kanji: slice(raw.from, raw.to),
Furigana: raw.furigana,
})
nextIndex = raw.to + 1
}
}
return Entry{
Kanji: kanji,
Furigana: furiganaList,
Reading: reading,
Definitions: definitions,
}

@ -1,11 +1,15 @@
{{ define "entry" }}
<div class="box">
<h3>
{{- if .Kanji -}}
<a href="/word/{{ .Kanji }}"><ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby></a>
{{- else -}}
{{- .Reading -}}
<h3 lang="ja">
<a href="/word/{{ .Kanji }}">
{{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Kanji -}}
{{- end -}}
{{- end -}}
</a>
</h3>
{{- $count := len .Definitions -}}
{{ if eq $count 1 -}}

@ -1,11 +1,15 @@
{{ define "entryfull" }}
<div class="box">
<h3>
{{- if .Kanji -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Reading -}}
<h3 lang="ja">
<a href="/word/{{ .Kanji }}">
{{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Kanji -}}
{{- end -}}
{{- end -}}
</a>
</h3>
{{- $count := len .Definitions -}}
{{ if eq $count 1 -}}