generated from ElnuDev/go-project
Add proper furigana
This commit is contained in:
parent
91fb5d9e3b
commit
2b85099901
5 changed files with 116 additions and 14 deletions
1
dict/.gitignore
vendored
1
dict/.gitignore
vendored
|
@ -1,2 +1,3 @@
|
||||||
JMdict.xml
|
JMdict.xml
|
||||||
|
JmdictFurigana.txt
|
||||||
dict.bin
|
dict.bin
|
|
@ -9,6 +9,13 @@ Its primary goals are:
|
||||||
|
|
||||||
### Configuration
|
### Configuration
|
||||||
|
|
||||||
|
#### JMdict
|
||||||
|
|
||||||
1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time.
|
1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time.
|
||||||
2. Extract the archive
|
2. Extract the archive
|
||||||
3. Rename the file to JMDict.xml
|
3. Rename the file to JMDict.xml
|
||||||
|
|
||||||
|
#### JmdictFurigana
|
||||||
|
|
||||||
|
1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases)
|
||||||
|
2. Ensure it's named JmdictFurigana.txt.
|
90
dict/main.go
90
dict/main.go
|
@ -1,12 +1,14 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"encoding/gob"
|
"encoding/gob"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"foosoft.net/projects/jmdict"
|
"foosoft.net/projects/jmdict"
|
||||||
|
@ -41,9 +43,30 @@ func LoadDict() error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const jmdictFuriganaFile = "JmdictFurigana.txt"
|
||||||
|
reader, err = os.Open(jmdictFuriganaFile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
scanner := bufio.NewScanner(reader)
|
||||||
|
furiganaData := make(map[string]string)
|
||||||
|
for scanner.Scan() {
|
||||||
|
params := strings.Split(scanner.Text(), "|")
|
||||||
|
furiganaData[params[0]] = params[2]
|
||||||
|
}
|
||||||
|
|
||||||
dict = make(map[string]Entry)
|
dict = make(map[string]Entry)
|
||||||
for _, jmdictEntry := range jmdict.Entries {
|
for _, jmdictEntry := range jmdict.Entries {
|
||||||
entry := ParseEntry(&jmdictEntry)
|
// お願い致します|おねがいいたします|1:ねが;3:いた
|
||||||
|
var furiganaInfo *string
|
||||||
|
if len(jmdictEntry.Kanji) > 0 {
|
||||||
|
data := furiganaData[jmdictEntry.Kanji[0].Expression]
|
||||||
|
furiganaInfo = &data
|
||||||
|
} else {
|
||||||
|
furiganaInfo = nil
|
||||||
|
}
|
||||||
|
entry := ParseEntry(&jmdictEntry, furiganaInfo)
|
||||||
dict[entry.Kanji] = entry
|
dict[entry.Kanji] = entry
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,8 +85,15 @@ func LoadDict() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Furigana struct {
|
||||||
|
Kanji string
|
||||||
|
Furigana string
|
||||||
|
}
|
||||||
|
|
||||||
type Entry struct {
|
type Entry struct {
|
||||||
Kanji string
|
Kanji string
|
||||||
|
// Mapping of character index to furigana
|
||||||
|
Furigana []Furigana
|
||||||
Reading string
|
Reading string
|
||||||
Definitions []Definition
|
Definitions []Definition
|
||||||
}
|
}
|
||||||
|
@ -73,7 +103,7 @@ type Definition struct {
|
||||||
PartOfSpeech []string
|
PartOfSpeech []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseEntry(entry *jmdict.JmdictEntry) Entry {
|
func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry {
|
||||||
kanji := ""
|
kanji := ""
|
||||||
if len(entry.Kanji) > 0 {
|
if len(entry.Kanji) > 0 {
|
||||||
kanji = entry.Kanji[0].Expression
|
kanji = entry.Kanji[0].Expression
|
||||||
|
@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry {
|
||||||
PartOfSpeech: sense.PartsOfSpeech,
|
PartOfSpeech: sense.PartsOfSpeech,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// 1:ねが;3:いた
|
||||||
|
var furiganaList []Furigana
|
||||||
|
if kanji == "" || *furiganaInfo == "" {
|
||||||
|
furiganaList = []Furigana{{Kanji: reading, Furigana: ""}}
|
||||||
|
} else {
|
||||||
|
furiganaEntries := strings.Split(*furiganaInfo, ";")
|
||||||
|
// ["1:ねが", "3:いた"]
|
||||||
|
type rawFurigana struct {
|
||||||
|
from int
|
||||||
|
to int
|
||||||
|
furigana string
|
||||||
|
}
|
||||||
|
ruby := make([]rawFurigana, 0)
|
||||||
|
for _, entry := range furiganaEntries {
|
||||||
|
// 1:ねが
|
||||||
|
// multiple: 0-1:きょう
|
||||||
|
params := strings.Split(entry, ":")
|
||||||
|
// ["1", "ねが"]
|
||||||
|
// multiple: ["0-1", "きょう"]
|
||||||
|
indexRange := strings.Split(params[0], "-")
|
||||||
|
// [1]
|
||||||
|
// multiple: [0, 1]
|
||||||
|
var from, to int
|
||||||
|
if len(indexRange) == 1 {
|
||||||
|
index, _ := strconv.Atoi(indexRange[0])
|
||||||
|
from, to = index, index
|
||||||
|
} else {
|
||||||
|
from, _ = strconv.Atoi(indexRange[0])
|
||||||
|
to, _ = strconv.Atoi(indexRange[1])
|
||||||
|
}
|
||||||
|
ruby = append(ruby, rawFurigana{
|
||||||
|
from: from,
|
||||||
|
to: to,
|
||||||
|
furigana: params[1],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
furiganaList = make([]Furigana, 0)
|
||||||
|
slice := func(from, to int) string {
|
||||||
|
return string([]rune(kanji)[from : to+1])
|
||||||
|
}
|
||||||
|
nextIndex := 0
|
||||||
|
for _, raw := range ruby {
|
||||||
|
if raw.from > nextIndex {
|
||||||
|
furiganaList = append(furiganaList, Furigana{
|
||||||
|
Kanji: slice(nextIndex, raw.from-1),
|
||||||
|
Furigana: "",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
furiganaList = append(furiganaList, Furigana{
|
||||||
|
Kanji: slice(raw.from, raw.to),
|
||||||
|
Furigana: raw.furigana,
|
||||||
|
})
|
||||||
|
nextIndex = raw.to + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
return Entry{
|
return Entry{
|
||||||
Kanji: kanji,
|
Kanji: kanji,
|
||||||
|
Furigana: furiganaList,
|
||||||
Reading: reading,
|
Reading: reading,
|
||||||
Definitions: definitions,
|
Definitions: definitions,
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
{{ define "entry" }}
|
{{ define "entry" }}
|
||||||
<div class="box">
|
<div class="box">
|
||||||
<h3>
|
<h3 lang="ja">
|
||||||
{{- if .Kanji -}}
|
<a href="/word/{{ .Kanji }}">
|
||||||
<a href="/word/{{ .Kanji }}"><ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby></a>
|
{{- range .Furigana -}}
|
||||||
|
{{- if .Furigana -}}
|
||||||
|
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
|
||||||
{{- else -}}
|
{{- else -}}
|
||||||
{{- .Reading -}}
|
{{- .Kanji -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
</a>
|
||||||
</h3>
|
</h3>
|
||||||
{{- $count := len .Definitions -}}
|
{{- $count := len .Definitions -}}
|
||||||
{{ if eq $count 1 -}}
|
{{ if eq $count 1 -}}
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
{{ define "entryfull" }}
|
{{ define "entryfull" }}
|
||||||
<div class="box">
|
<div class="box">
|
||||||
<h3>
|
<h3 lang="ja">
|
||||||
{{- if .Kanji -}}
|
<a href="/word/{{ .Kanji }}">
|
||||||
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Reading -}}</rt><rp>)</rp></ruby>
|
{{- range .Furigana -}}
|
||||||
|
{{- if .Furigana -}}
|
||||||
|
<ruby>{{- .Kanji -}}<rp>(</rp><rt>{{- .Furigana -}}</rt><rp>)</rp></ruby>
|
||||||
{{- else -}}
|
{{- else -}}
|
||||||
{{- .Reading -}}
|
{{- .Kanji -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
</a>
|
||||||
</h3>
|
</h3>
|
||||||
{{- $count := len .Definitions -}}
|
{{- $count := len .Definitions -}}
|
||||||
{{ if eq $count 1 -}}
|
{{ if eq $count 1 -}}
|
||||||
|
|
Reference in a new issue