@ -1,12 +1,14 @@
package main
package main
import (
import (
"bufio"
"encoding/gob"
"encoding/gob"
"encoding/json"
"encoding/json"
"fmt"
"fmt"
"log"
"log"
"net/http"
"net/http"
"os"
"os"
"strconv"
"strings"
"strings"
"foosoft.net/projects/jmdict"
"foosoft.net/projects/jmdict"
@ -41,9 +43,30 @@ func LoadDict() error {
if err != nil {
if err != nil {
return err
return err
}
}
const jmdictFuriganaFile = "JmdictFurigana.txt"
reader , err = os . Open ( jmdictFuriganaFile )
if err != nil {
return err
}
scanner := bufio . NewScanner ( reader )
furiganaData := make ( map [ string ] string )
for scanner . Scan ( ) {
params := strings . Split ( scanner . Text ( ) , "|" )
furiganaData [ params [ 0 ] ] = params [ 2 ]
}
dict = make ( map [ string ] Entry )
dict = make ( map [ string ] Entry )
for _ , jmdictEntry := range jmdict . Entries {
for _ , jmdictEntry := range jmdict . Entries {
entry := ParseEntry ( & jmdictEntry )
// お願い致します|おねがいいたします|1:ねが;3:いた
var furiganaInfo * string
if len ( jmdictEntry . Kanji ) > 0 {
data := furiganaData [ jmdictEntry . Kanji [ 0 ] . Expression ]
furiganaInfo = & data
} else {
furiganaInfo = nil
}
entry := ParseEntry ( & jmdictEntry , furiganaInfo )
dict [ entry . Kanji ] = entry
dict [ entry . Kanji ] = entry
}
}
@ -62,8 +85,15 @@ func LoadDict() error {
return nil
return nil
}
}
type Furigana struct {
Kanji string
Furigana string
}
type Entry struct {
type Entry struct {
Kanji string
Kanji string
// Mapping of character index to furigana
Furigana [ ] Furigana
Reading string
Reading string
Definitions [ ] Definition
Definitions [ ] Definition
}
}
@ -73,7 +103,7 @@ type Definition struct {
PartOfSpeech [ ] string
PartOfSpeech [ ] string
}
}
func ParseEntry ( entry * jmdict . JmdictEntry ) Entry {
func ParseEntry ( entry * jmdict . JmdictEntry , furiganaInfo * string ) Entry {
kanji := ""
kanji := ""
if len ( entry . Kanji ) > 0 {
if len ( entry . Kanji ) > 0 {
kanji = entry . Kanji [ 0 ] . Expression
kanji = entry . Kanji [ 0 ] . Expression
@ -96,8 +126,64 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry {
PartOfSpeech : sense . PartsOfSpeech ,
PartOfSpeech : sense . PartsOfSpeech ,
}
}
}
}
// 1:ねが;3:いた
var furiganaList [ ] Furigana
if kanji == "" || * furiganaInfo == "" {
furiganaList = [ ] Furigana { { Kanji : reading , Furigana : "" } }
} else {
furiganaEntries := strings . Split ( * furiganaInfo , ";" )
// ["1:ねが", "3:いた"]
type rawFurigana struct {
from int
to int
furigana string
}
ruby := make ( [ ] rawFurigana , 0 )
for _ , entry := range furiganaEntries {
// 1:ねが
// multiple: 0-1:きょう
params := strings . Split ( entry , ":" )
// ["1", "ねが"]
// multiple: ["0-1", "きょう"]
indexRange := strings . Split ( params [ 0 ] , "-" )
// [1]
// multiple: [0, 1]
var from , to int
if len ( indexRange ) == 1 {
index , _ := strconv . Atoi ( indexRange [ 0 ] )
from , to = index , index
} else {
from , _ = strconv . Atoi ( indexRange [ 0 ] )
to , _ = strconv . Atoi ( indexRange [ 1 ] )
}
ruby = append ( ruby , rawFurigana {
from : from ,
to : to ,
furigana : params [ 1 ] ,
} )
}
furiganaList = make ( [ ] Furigana , 0 )
slice := func ( from , to int ) string {
return string ( [ ] rune ( kanji ) [ from : to + 1 ] )
}
nextIndex := 0
for _ , raw := range ruby {
if raw . from > nextIndex {
furiganaList = append ( furiganaList , Furigana {
Kanji : slice ( nextIndex , raw . from - 1 ) ,
Furigana : "" ,
} )
}
furiganaList = append ( furiganaList , Furigana {
Kanji : slice ( raw . from , raw . to ) ,
Furigana : raw . furigana ,
} )
nextIndex = raw . to + 1
}
}
return Entry {
return Entry {
Kanji : kanji ,
Kanji : kanji ,
Furigana : furiganaList ,
Reading : reading ,
Reading : reading ,
Definitions : definitions ,
Definitions : definitions ,
}
}