Add proper furigana

main
Elnu 2 years ago
parent 91fb5d9e3b
commit 2b85099901

1
dict/.gitignore vendored

@ -1,2 +1,3 @@
JMdict.xml
JmdictFurigana.txt
dict.bin

@ -9,6 +9,13 @@ Its primary goals are:
### Configuration
#### JMdict
1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time.
2. Extract the archive
3. Rename the file to JMDict.xml
#### JmdictFurigana
1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases)
2. Ensure it's named JmdictFurigana.txt.

@ -1,12 +1,14 @@
package main
import (
"bufio"
"encoding/gob"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strconv"
"strings"
"foosoft.net/projects/jmdict"
@ -41,9 +43,30 @@ func LoadDict() error {
if err != nil {
return err
}
const jmdictFuriganaFile = "JmdictFurigana.txt"
reader, err = os.Open(jmdictFuriganaFile)
if err != nil {
return err
}
scanner := bufio.NewScanner(reader)
furiganaData := make(map[string]string)
for scanner.Scan() {
params := strings.Split(scanner.Text(), "|")
furiganaData[params[0]] = params[2]
}
dict = make(map[string]Entry)
for _, jmdictEntry := range jmdict.Entries {
entry := ParseEntry(&jmdictEntry)
// お願い致します|おねがいいたします|1:ねが;3:いた
var furiganaInfo *string
if len(jmdictEntry.Kanji) > 0 {
data := furiganaData[jmdictEntry.Kanji[0].Expression]
furiganaInfo = &data
} else {
furiganaInfo = nil
}
entry := ParseEntry(&jmdictEntry, furiganaInfo)
dict[entry.Kanji] = entry
}
@ -62,8 +85,15 @@ func LoadDict() error {
return nil
}
type Furigana struct {
Kanji string
Furigana string
}
type Entry struct {
Kanji string
Kanji string
// Mapping of character index to furigana
Furigana []Furigana
Reading string
Definitions []Definition
}
@ -73,7 +103,7 @@ type Definition struct {
PartOfSpeech []string
}
func ParseEntry(entry *jmdict.JmdictEntry) Entry {
func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry {
kanji := ""
if len(entry.Kanji) > 0 {
kanji = entry.Kanji[0].Expression
@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry {
PartOfSpeech: sense.PartsOfSpeech,
}
}
// 1:ねが;3:いた
var furiganaList []Furigana
if kanji == "" || *furiganaInfo == "" {
furiganaList = []Furigana{{Kanji: reading, Furigana: ""}}
} else {
furiganaEntries := strings.Split(*furiganaInfo, ";")
// ["1:ねが", "3:いた"]
type rawFurigana struct {
from int
to int
furigana string
}
ruby := make([]rawFurigana, 0)
for _, entry := range furiganaEntries {
// 1:ねが
// multiple: 0-1:きょう
params := strings.Split(entry, ":")
// ["1", "ねが"]
// multiple: ["0-1", "きょう"]
indexRange := strings.Split(params[0], "-")
// [1]
// multiple: [0, 1]
var from, to int
if len(indexRange) == 1 {
index, _ := strconv.Atoi(indexRange[0])
from, to = index, index
} else {
from, _ = strconv.Atoi(indexRange[0])
to, _ = strconv.Atoi(indexRange[1])
}
ruby = append(ruby, rawFurigana{
from: from,
to: to,
furigana: params[1],
})
}
furiganaList = make([]Furigana, 0)
slice := func(from, to int) string {
return string([]rune(kanji)[from : to+1])
}
nextIndex := 0
for _, raw := range ruby {
if raw.from > nextIndex {
furiganaList = append(furiganaList, Furigana{
Kanji: slice(nextIndex, raw.from-1),
Furigana: "",
})
}
furiganaList = append(furiganaList, Furigana{
Kanji: slice(raw.from, raw.to),
Furigana: raw.furigana,
})
nextIndex = raw.to + 1
}
}
return Entry{
Kanji: kanji,
Furigana: furiganaList,
Reading: reading,
Definitions: definitions,
}

@ -1,11 +1,15 @@
{{ define "entry" }}
<div class="box">
<h3>
{{- if .Kanji -}}
<a href="/word/{{ .Kanji }}"><ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby></a>
{{- else -}}
{{- .Reading -}}
<h3 lang="ja">
<a href="/word/{{ .Kanji }}">
{{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Kanji -}}
{{- end -}}
{{- end -}}
</a>
</h3>
{{- $count := len .Definitions -}}
{{ if eq $count 1 -}}

@ -1,11 +1,15 @@
{{ define "entryfull" }}
<div class="box">
<h3>
{{- if .Kanji -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Reading -}}
<h3 lang="ja">
<a href="/word/{{ .Kanji }}">
{{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Kanji -}}
{{- end -}}
{{- end -}}
</a>
</h3>
{{- $count := len .Definitions -}}
{{ if eq $count 1 -}}