Want to contribute? Fork me on Codeberg.org!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

351 lines
8.1 KiB

package main
import (
"bufio"
"encoding/gob"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"strconv"
"strings"
"foosoft.net/projects/jmdict"
"git.elnu.com/ElnuDev/jichanorg/httputils"
"github.com/gorilla/mux"
)
var dict map[string]Entry
func LoadDict() error {
// Loading from binary
const binaryFile = "dict.bin"
file, err := os.Open(binaryFile)
if err != nil {
if !os.IsNotExist(err) {
return err
}
} else {
defer file.Close()
decoder := gob.NewDecoder(file)
err = decoder.Decode(&dict)
return err
}
// Loading from JMdict
const jmdictFile = "JMdict.xml"
reader, err := os.Open(jmdictFile)
if err != nil {
return err
}
jmdict, _, err := jmdict.LoadJmdict(reader)
if err != nil {
return err
}
const jmdictFuriganaFile = "JmdictFurigana.txt"
reader, err = os.Open(jmdictFuriganaFile)
if err != nil {
return err
}
scanner := bufio.NewScanner(reader)
furiganaData := make(map[string]string)
for scanner.Scan() {
params := strings.Split(scanner.Text(), "|")
furiganaData[params[0]] = params[2]
}
dict = make(map[string]Entry)
for _, jmdictEntry := range jmdict.Entries {
// お願い致します|おねがいいたします|1:ねが;3:いた
var furiganaInfo *string
if len(jmdictEntry.Kanji) > 0 {
data := furiganaData[jmdictEntry.Kanji[0].Expression]
furiganaInfo = &data
} else {
furiganaInfo = nil
}
entry := ParseEntry(&jmdictEntry, furiganaInfo)
dict[entry.Kanji] = entry
}
// Encoding to binary
file, err = os.Create(binaryFile)
if err != nil {
return err
}
defer file.Close()
encoder := gob.NewEncoder(file)
err = encoder.Encode(&dict)
if err != nil {
return err
}
return nil
}
type Furigana struct {
Kanji string
Furigana string
}
type Entry struct {
Kanji string
// Mapping of character index to furigana
Furigana []Furigana
Reading string
Definitions []Definition
}
type Definition struct {
Definition string
PartOfSpeech []string
}
func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry {
kanji := ""
if len(entry.Kanji) > 0 {
kanji = entry.Kanji[0].Expression
} else {
kanji = entry.Readings[0].Reading
}
reading := ""
if kanji != "" {
reading = entry.Readings[0].Reading
}
var definitions []Definition
definitions = make([]Definition, len(entry.Sense))
for i, sense := range entry.Sense {
definition := sense.Glossary[0].Content
if len(sense.Glossary) > 1 {
for _, glossary := range sense.Glossary[1:] {
definition += "; " + glossary.Content
}
}
definitions[i] = Definition{
Definition: definition,
PartOfSpeech: sense.PartsOfSpeech,
}
}
// 1:ねが;3:いた
var furiganaList []Furigana
if reading == "" || furiganaInfo == nil || *furiganaInfo == "" {
furiganaList = []Furigana{{Kanji: reading, Furigana: ""}}
} else {
furiganaEntries := strings.Split(*furiganaInfo, ";")
// ["1:ねが", "3:いた"]
type rawFurigana struct {
from int
to int
furigana string
}
ruby := make([]rawFurigana, 0)
for _, entry := range furiganaEntries {
// 1:ねが
// multiple: 0-1:きょう
params := strings.Split(entry, ":")
// ["1", "ねが"]
// multiple: ["0-1", "きょう"]
indexRange := strings.Split(params[0], "-")
// [1]
// multiple: [0, 1]
var from, to int
if len(indexRange) == 1 {
index, _ := strconv.Atoi(indexRange[0])
from, to = index, index
} else {
from, _ = strconv.Atoi(indexRange[0])
to, _ = strconv.Atoi(indexRange[1])
}
ruby = append(ruby, rawFurigana{
from: from,
to: to,
furigana: params[1],
})
}
furiganaList = make([]Furigana, 0)
slice := func(from, to int) string {
return string([]rune(kanji)[from : to+1])
}
nextIndex := 0
for _, raw := range ruby {
if raw.from > nextIndex {
furiganaList = append(furiganaList, Furigana{
Kanji: slice(nextIndex, raw.from-1),
Furigana: "",
})
}
furiganaList = append(furiganaList, Furigana{
Kanji: slice(raw.from, raw.to),
Furigana: raw.furigana,
})
nextIndex = raw.to + 1
}
length := len([]rune(kanji))
if nextIndex < length {
furiganaList = append(furiganaList, Furigana{
Kanji: slice(nextIndex, length-1),
Furigana: "",
})
}
}
return Entry{
Kanji: kanji,
Furigana: furiganaList,
Reading: reading,
Definitions: definitions,
}
}
func Search(query string) queryResult {
query = strings.TrimSpace(query)
exactResults := make([]Entry, 0)
otherResults := make([]Entry, 0)
truncated := false
count := 0
for kanji := range dict {
exactMatch := false
entry := dict[kanji]
if kanji == query {
exactMatch = true
goto match
}
if strings.Contains(kanji, query) {
goto match
}
// TODO: Skip if query contains kanji
if strings.Contains(entry.Reading, query) {
goto match
}
continue
match:
if exactMatch {
exactResults = append(exactResults, entry)
} else {
otherResults = append(otherResults, entry)
}
count++
if count >= 500 {
truncated = true
break
}
}
return queryResult{
Query: query,
ExactResults: exactResults,
OtherResults: otherResults,
Truncated: truncated,
Count: len(exactResults) + len(otherResults),
}
}
func Lookup(word string) *Entry {
for kanji, entry := range dict {
if kanji == word {
return &entry
}
}
return nil
}
type queryResult struct {
// Fields must be capitalized
// to be accessible in templates
Query string
ExactResults []Entry
OtherResults []Entry
Truncated bool
Count int
}
func main() {
err := LoadDict()
if err != nil {
fmt.Println(err)
return
}
fmt.Println("JMdict loaded!")
r := mux.NewRouter()
r.HandleFunc("/", httputils.GenerateHandler(
func(w http.ResponseWriter, r *http.Request) bool { return true },
httputils.NewTemplateSet("index.html"),
func(w http.ResponseWriter, r *http.Request) (string, any) { return "index.html", nil },
[]string{http.MethodGet},
))
rawSearchHandler := func(w http.ResponseWriter, r *http.Request) {
r.ParseMultipartForm(0)
q := r.FormValue("q")
var redirect string
if q == "" {
redirect = "/"
} else {
redirect = "/search/" + q
}
http.Redirect(w, r, redirect, http.StatusMovedPermanently)
}
r.HandleFunc("/search", rawSearchHandler)
r.HandleFunc("/search/", rawSearchHandler)
r.HandleFunc("/search/{query}", httputils.GenerateHandler(
// handler whether or not to use template
func(w http.ResponseWriter, r *http.Request) bool {
// If Accept: applicaiton/json we'll use the template
if r.Header.Get("Accept") != "application/json" {
return true
}
// Otherwise, let's send JSON
query := mux.Vars(r)["query"]
result := Search(query)
jsonBytes, _ := json.Marshal(append(result.ExactResults, result.OtherResults...))
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprint(w, string(jsonBytes))
return false
},
httputils.NewTemplateSet("index.html", "search.html"),
// template data
func(w http.ResponseWriter, r *http.Request) (template string, data any) {
if r.Header.Get("HX-Request") == "" {
template = "search.html"
} else {
template = "search"
}
// Only runs if handler returns true
query := mux.Vars(r)["query"]
data = Search(query)
return
},
[]string{http.MethodGet},
))
rawWordHandler := func(w http.ResponseWriter, r *http.Request) {
fmt.Println("Redirecting raw word handler")
http.Redirect(w, r, "/", http.StatusMovedPermanently)
}
r.HandleFunc("/word", rawWordHandler)
r.HandleFunc("/word/", rawWordHandler)
r.HandleFunc("/word/{word}", httputils.GenerateHandler(
func(w http.ResponseWriter, r *http.Request) bool { return true },
// Order matters
// word.html overrided the results block in index.html
// so should be loaded second
httputils.NewTemplateSet("index.html", "word.html"),
func(w http.ResponseWriter, r *http.Request) (template string, data any) {
template = "word.html"
query := mux.Vars(r)["word"]
data = struct {
Query any
Entry *Entry
}{
Query: nil,
Entry: Lookup(query),
}
return
},
[]string{http.MethodGet},
))
r.Handle("/", http.FileServer(http.Dir("static")))
log.Fatal(http.ListenAndServe(":3334", r))
}