generated from ElnuDev/go-project
Add proper furigana
This commit is contained in:
parent
91fb5d9e3b
commit
2b85099901
5 changed files with 116 additions and 14 deletions
1
dict/.gitignore
vendored
1
dict/.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
JMdict.xml
|
||||
JmdictFurigana.txt
|
||||
dict.bin
|
|
@ -9,6 +9,13 @@ Its primary goals are:
|
|||
|
||||
### Configuration
|
||||
|
||||
#### JMdict
|
||||
|
||||
1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time.
|
||||
2. Extract the archive
|
||||
3. Rename the file to JMDict.xml
|
||||
3. Rename the file to JMDict.xml
|
||||
|
||||
#### JmdictFurigana
|
||||
|
||||
1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases)
|
||||
2. Ensure it's named JmdictFurigana.txt.
|
92
dict/main.go
92
dict/main.go
|
@ -1,12 +1,14 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/gob"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"foosoft.net/projects/jmdict"
|
||||
|
@ -41,9 +43,30 @@ func LoadDict() error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
const jmdictFuriganaFile = "JmdictFurigana.txt"
|
||||
reader, err = os.Open(jmdictFuriganaFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
scanner := bufio.NewScanner(reader)
|
||||
furiganaData := make(map[string]string)
|
||||
for scanner.Scan() {
|
||||
params := strings.Split(scanner.Text(), "|")
|
||||
furiganaData[params[0]] = params[2]
|
||||
}
|
||||
|
||||
dict = make(map[string]Entry)
|
||||
for _, jmdictEntry := range jmdict.Entries {
|
||||
entry := ParseEntry(&jmdictEntry)
|
||||
// お願い致します|おねがいいたします|1:ねが;3:いた
|
||||
var furiganaInfo *string
|
||||
if len(jmdictEntry.Kanji) > 0 {
|
||||
data := furiganaData[jmdictEntry.Kanji[0].Expression]
|
||||
furiganaInfo = &data
|
||||
} else {
|
||||
furiganaInfo = nil
|
||||
}
|
||||
entry := ParseEntry(&jmdictEntry, furiganaInfo)
|
||||
dict[entry.Kanji] = entry
|
||||
}
|
||||
|
||||
|
@ -62,8 +85,15 @@ func LoadDict() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
type Furigana struct {
|
||||
Kanji string
|
||||
Furigana string
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
Kanji string
|
||||
Kanji string
|
||||
// Mapping of character index to furigana
|
||||
Furigana []Furigana
|
||||
Reading string
|
||||
Definitions []Definition
|
||||
}
|
||||
|
@ -73,7 +103,7 @@ type Definition struct {
|
|||
PartOfSpeech []string
|
||||
}
|
||||
|
||||
func ParseEntry(entry *jmdict.JmdictEntry) Entry {
|
||||
func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry {
|
||||
kanji := ""
|
||||
if len(entry.Kanji) > 0 {
|
||||
kanji = entry.Kanji[0].Expression
|
||||
|
@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry {
|
|||
PartOfSpeech: sense.PartsOfSpeech,
|
||||
}
|
||||
}
|
||||
// 1:ねが;3:いた
|
||||
var furiganaList []Furigana
|
||||
if kanji == "" || *furiganaInfo == "" {
|
||||
furiganaList = []Furigana{{Kanji: reading, Furigana: ""}}
|
||||
} else {
|
||||
furiganaEntries := strings.Split(*furiganaInfo, ";")
|
||||
// ["1:ねが", "3:いた"]
|
||||
type rawFurigana struct {
|
||||
from int
|
||||
to int
|
||||
furigana string
|
||||
}
|
||||
ruby := make([]rawFurigana, 0)
|
||||
for _, entry := range furiganaEntries {
|
||||
// 1:ねが
|
||||
// multiple: 0-1:きょう
|
||||
params := strings.Split(entry, ":")
|
||||
// ["1", "ねが"]
|
||||
// multiple: ["0-1", "きょう"]
|
||||
indexRange := strings.Split(params[0], "-")
|
||||
// [1]
|
||||
// multiple: [0, 1]
|
||||
var from, to int
|
||||
if len(indexRange) == 1 {
|
||||
index, _ := strconv.Atoi(indexRange[0])
|
||||
from, to = index, index
|
||||
} else {
|
||||
from, _ = strconv.Atoi(indexRange[0])
|
||||
to, _ = strconv.Atoi(indexRange[1])
|
||||
}
|
||||
ruby = append(ruby, rawFurigana{
|
||||
from: from,
|
||||
to: to,
|
||||
furigana: params[1],
|
||||
})
|
||||
}
|
||||
furiganaList = make([]Furigana, 0)
|
||||
slice := func(from, to int) string {
|
||||
return string([]rune(kanji)[from : to+1])
|
||||
}
|
||||
nextIndex := 0
|
||||
for _, raw := range ruby {
|
||||
if raw.from > nextIndex {
|
||||
furiganaList = append(furiganaList, Furigana{
|
||||
Kanji: slice(nextIndex, raw.from-1),
|
||||
Furigana: "",
|
||||
})
|
||||
}
|
||||
furiganaList = append(furiganaList, Furigana{
|
||||
Kanji: slice(raw.from, raw.to),
|
||||
Furigana: raw.furigana,
|
||||
})
|
||||
nextIndex = raw.to + 1
|
||||
}
|
||||
}
|
||||
return Entry{
|
||||
Kanji: kanji,
|
||||
Furigana: furiganaList,
|
||||
Reading: reading,
|
||||
Definitions: definitions,
|
||||
}
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
{{ define "entry" }}
|
||||
<div class="box">
|
||||
<h3>
|
||||
{{- if .Kanji -}}
|
||||
<a href="/word/{{ .Kanji }}"><ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby></a>
|
||||
{{- else -}}
|
||||
{{- .Reading -}}
|
||||
<h3 lang="ja">
|
||||
<a href="/word/{{ .Kanji }}">
|
||||
{{- range .Furigana -}}
|
||||
{{- if .Furigana -}}
|
||||
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
|
||||
{{- else -}}
|
||||
{{- .Kanji -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
</a>
|
||||
</h3>
|
||||
{{- $count := len .Definitions -}}
|
||||
{{ if eq $count 1 -}}
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
{{ define "entryfull" }}
|
||||
<div class="box">
|
||||
<h3>
|
||||
{{- if .Kanji -}}
|
||||
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby>
|
||||
{{- else -}}
|
||||
{{- .Reading -}}
|
||||
<h3 lang="ja">
|
||||
<a href="/word/{{ .Kanji }}">
|
||||
{{- range .Furigana -}}
|
||||
{{- if .Furigana -}}
|
||||
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
|
||||
{{- else -}}
|
||||
{{- .Kanji -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
</a>
|
||||
</h3>
|
||||
{{- $count := len .Definitions -}}
|
||||
{{ if eq $count 1 -}}
|
||||
|
|
Reference in a new issue