Add proper furigana

This commit is contained in:
Elnu 2023-07-23 13:30:40 -07:00
parent 91fb5d9e3b
commit 2b85099901
5 changed files with 116 additions and 14 deletions

1
dict/.gitignore vendored
View file

@ -1,2 +1,3 @@
JMdict.xml
JmdictFurigana.txt
dict.bin

View file

@ -9,6 +9,13 @@ Its primary goals are:
### Configuration
#### JMdict
1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time.
2. Extract the archive
3. Rename the file to JMDict.xml
3. Rename the file to JMDict.xml
#### JmdictFurigana
1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases)
2. Ensure it's named JmdictFurigana.txt.

View file

@ -1,12 +1,14 @@
package main
import (
"bufio"
"encoding/gob"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strconv"
"strings"
"foosoft.net/projects/jmdict"
@ -41,9 +43,30 @@ func LoadDict() error {
if err != nil {
return err
}
const jmdictFuriganaFile = "JmdictFurigana.txt"
reader, err = os.Open(jmdictFuriganaFile)
if err != nil {
return err
}
scanner := bufio.NewScanner(reader)
furiganaData := make(map[string]string)
for scanner.Scan() {
params := strings.Split(scanner.Text(), "|")
furiganaData[params[0]] = params[2]
}
dict = make(map[string]Entry)
for _, jmdictEntry := range jmdict.Entries {
entry := ParseEntry(&jmdictEntry)
// お願い致します|おねがいいたします|1:ねが;3:いた
var furiganaInfo *string
if len(jmdictEntry.Kanji) > 0 {
data := furiganaData[jmdictEntry.Kanji[0].Expression]
furiganaInfo = &data
} else {
furiganaInfo = nil
}
entry := ParseEntry(&jmdictEntry, furiganaInfo)
dict[entry.Kanji] = entry
}
@ -62,8 +85,15 @@ func LoadDict() error {
return nil
}
type Furigana struct {
Kanji string
Furigana string
}
type Entry struct {
Kanji string
Kanji string
// Mapping of character index to furigana
Furigana []Furigana
Reading string
Definitions []Definition
}
@ -73,7 +103,7 @@ type Definition struct {
PartOfSpeech []string
}
func ParseEntry(entry *jmdict.JmdictEntry) Entry {
func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry {
kanji := ""
if len(entry.Kanji) > 0 {
kanji = entry.Kanji[0].Expression
@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry {
PartOfSpeech: sense.PartsOfSpeech,
}
}
// 1:ねが;3:いた
var furiganaList []Furigana
if kanji == "" || *furiganaInfo == "" {
furiganaList = []Furigana{{Kanji: reading, Furigana: ""}}
} else {
furiganaEntries := strings.Split(*furiganaInfo, ";")
// ["1:ねが", "3:いた"]
type rawFurigana struct {
from int
to int
furigana string
}
ruby := make([]rawFurigana, 0)
for _, entry := range furiganaEntries {
// 1:ねが
// multiple: 0-1:きょう
params := strings.Split(entry, ":")
// ["1", "ねが"]
// multiple: ["0-1", "きょう"]
indexRange := strings.Split(params[0], "-")
// [1]
// multiple: [0, 1]
var from, to int
if len(indexRange) == 1 {
index, _ := strconv.Atoi(indexRange[0])
from, to = index, index
} else {
from, _ = strconv.Atoi(indexRange[0])
to, _ = strconv.Atoi(indexRange[1])
}
ruby = append(ruby, rawFurigana{
from: from,
to: to,
furigana: params[1],
})
}
furiganaList = make([]Furigana, 0)
slice := func(from, to int) string {
return string([]rune(kanji)[from : to+1])
}
nextIndex := 0
for _, raw := range ruby {
if raw.from > nextIndex {
furiganaList = append(furiganaList, Furigana{
Kanji: slice(nextIndex, raw.from-1),
Furigana: "",
})
}
furiganaList = append(furiganaList, Furigana{
Kanji: slice(raw.from, raw.to),
Furigana: raw.furigana,
})
nextIndex = raw.to + 1
}
}
return Entry{
Kanji: kanji,
Furigana: furiganaList,
Reading: reading,
Definitions: definitions,
}

View file

@ -1,11 +1,15 @@
{{ define "entry" }}
<div class="box">
<h3>
{{- if .Kanji -}}
<a href="/word/{{ .Kanji }}"><ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby></a>
{{- else -}}
{{- .Reading -}}
<h3 lang="ja">
<a href="/word/{{ .Kanji }}">
{{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Kanji -}}
{{- end -}}
{{- end -}}
</a>
</h3>
{{- $count := len .Definitions -}}
{{ if eq $count 1 -}}

View file

@ -1,11 +1,15 @@
{{ define "entryfull" }}
<div class="box">
<h3>
{{- if .Kanji -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Reading -}}
<h3 lang="ja">
<a href="/word/{{ .Kanji }}">
{{- range .Furigana -}}
{{- if .Furigana -}}
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
{{- else -}}
{{- .Kanji -}}
{{- end -}}
{{- end -}}
</a>
</h3>
{{- $count := len .Definitions -}}
{{ if eq $count 1 -}}