diff --git a/dict/.gitignore b/dict/.gitignore index 18e00d2..1a06b16 100644 --- a/dict/.gitignore +++ b/dict/.gitignore @@ -1,2 +1,3 @@ JMdict.xml +JmdictFurigana.txt dict.bin \ No newline at end of file diff --git a/dict/README.md b/dict/README.md index bb0c063..7518875 100644 --- a/dict/README.md +++ b/dict/README.md @@ -9,6 +9,13 @@ Its primary goals are: ### Configuration +#### JMdict + 1. [Download the latest version of JMdict](https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project#CURRENT_VERSION_&_DOWNLOAD). For development, download the file with only English glosses, which will substantially decrease parsing time. 2. Extract the archive -3. Rename the file to JMDict.xml \ No newline at end of file +3. Rename the file to JMDict.xml + +#### JmdictFurigana + +1. [Download the latest version of JmdictFurigana.txt](https://github.com/Doublevil/JmdictFurigana/releases) +2. Ensure it's named JmdictFurigana.txt. \ No newline at end of file diff --git a/dict/main.go b/dict/main.go index 073ab2e..2261a1c 100644 --- a/dict/main.go +++ b/dict/main.go @@ -1,12 +1,15 @@ package main import ( + "bufio" "encoding/gob" "encoding/json" "fmt" + "html/template" "log" "net/http" "os" + "strconv" "strings" "foosoft.net/projects/jmdict" @@ -14,9 +17,15 @@ import ( "github.com/gorilla/mux" ) +var words []string // since iterating over map isn't the same every time var dict map[string]Entry func LoadDict() error { + type binaryData struct { + Words []string + Dict map[string]Entry + } + // Loading from binary const binaryFile = "dict.bin" file, err := os.Open(binaryFile) @@ -27,7 +36,10 @@ func LoadDict() error { } else { defer file.Close() decoder := gob.NewDecoder(file) - err = decoder.Decode(&dict) + var data binaryData + err = decoder.Decode(&data) + words = data.Words + dict = data.Dict return err } @@ -41,10 +53,52 @@ func LoadDict() error { if err != nil { return err } + + const jmdictFuriganaFile = "JmdictFurigana.txt" + reader, err = os.Open(jmdictFuriganaFile) + if err != nil { + return err + } + scanner := bufio.NewScanner(reader) + furiganaData := make(map[string]string) + for scanner.Scan() { + params := strings.Split(scanner.Text(), "|") + // We need to include the reading as well since some words have the same kanji + furiganaData[fmt.Sprintf("%s|%s", params[0], params[1])] = params[2] + } + + words = make([]string, len(jmdict.Entries)) dict = make(map[string]Entry) - for _, jmdictEntry := range jmdict.Entries { - entry := ParseEntry(&jmdictEntry) - dict[entry.Kanji] = entry + for i, jmdictEntry := range jmdict.Entries { + // お願い致します|おねがいいたします|1:ねが;3:いた + var furiganaInfo *string + if len(jmdictEntry.Kanji) > 0 { + data := furiganaData[fmt.Sprintf("%s|%s", jmdictEntry.Kanji[0].Expression, jmdictEntry.Readings[0].Reading)] + furiganaInfo = &data + } else { + furiganaInfo = nil + } + entry := ParseEntry(&jmdictEntry, furiganaInfo) + offset := 0 + getKey := func() string { + if offset == 0 { + // unique + return entry.Kanji + } else { + return fmt.Sprintf("%s-%d", entry.Kanji, offset) + } + } + for { + if _, ok := dict[getKey()]; ok { + offset++ + } else { + break + } + } + key := getKey() + entry.Key = key + words[i] = key + dict[key] = entry } // Encoding to binary @@ -54,7 +108,11 @@ func LoadDict() error { } defer file.Close() encoder := gob.NewEncoder(file) - err = encoder.Encode(&dict) + data := binaryData{ + Words: words, + Dict: dict, + } + err = encoder.Encode(&data) if err != nil { return err } @@ -62,8 +120,16 @@ func LoadDict() error { return nil } +type Furigana struct { + Kanji string + Furigana string +} + type Entry struct { - Kanji string + Key string + Kanji string + // Mapping of character index to furigana + Furigana []Furigana Reading string Definitions []Definition } @@ -73,13 +139,15 @@ type Definition struct { PartOfSpeech []string } -func ParseEntry(entry *jmdict.JmdictEntry) Entry { +func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry { kanji := "" if len(entry.Kanji) > 0 { kanji = entry.Kanji[0].Expression + } else { + kanji = entry.Readings[0].Reading } reading := "" - if len(entry.Readings) > 0 { + if kanji != "" { reading = entry.Readings[0].Reading } var definitions []Definition @@ -96,29 +164,104 @@ func ParseEntry(entry *jmdict.JmdictEntry) Entry { PartOfSpeech: sense.PartsOfSpeech, } } + // 1:ねが;3:いた + var furiganaList []Furigana + if reading == "" || furiganaInfo == nil || *furiganaInfo == "" { + furiganaList = []Furigana{{Kanji: reading, Furigana: ""}} + } else { + furiganaEntries := strings.Split(*furiganaInfo, ";") + // ["1:ねが", "3:いた"] + type rawFurigana struct { + from int + to int + furigana string + } + ruby := make([]rawFurigana, 0) + for _, entry := range furiganaEntries { + // 1:ねが + // multiple: 0-1:きょう + params := strings.Split(entry, ":") + // ["1", "ねが"] + // multiple: ["0-1", "きょう"] + indexRange := strings.Split(params[0], "-") + // [1] + // multiple: [0, 1] + var from, to int + if len(indexRange) == 1 { + index, _ := strconv.Atoi(indexRange[0]) + from, to = index, index + } else { + from, _ = strconv.Atoi(indexRange[0]) + to, _ = strconv.Atoi(indexRange[1]) + } + ruby = append(ruby, rawFurigana{ + from: from, + to: to, + furigana: params[1], + }) + } + furiganaList = make([]Furigana, 0) + slice := func(from, to int) string { + return string([]rune(kanji)[from : to+1]) + } + nextIndex := 0 + for _, raw := range ruby { + if raw.from > nextIndex { + furiganaList = append(furiganaList, Furigana{ + Kanji: slice(nextIndex, raw.from-1), + Furigana: "", + }) + } + furiganaList = append(furiganaList, Furigana{ + Kanji: slice(raw.from, raw.to), + Furigana: raw.furigana, + }) + nextIndex = raw.to + 1 + } + length := len([]rune(kanji)) + if nextIndex < length { + furiganaList = append(furiganaList, Furigana{ + Kanji: slice(nextIndex, length-1), + Furigana: "", + }) + } + } return Entry{ Kanji: kanji, + Furigana: furiganaList, Reading: reading, Definitions: definitions, } } +func highlight(input, substring string) template.HTML { + // Replace all occurrences of substring with the highlighted version + replacement := fmt.Sprintf("%s", substring) + result := strings.ReplaceAll(input, substring, replacement) + return template.HTML(result) +} + func Search(query string) queryResult { query = strings.TrimSpace(query) exactResults := make([]Entry, 0) otherResults := make([]Entry, 0) truncated := false count := 0 - for kanji := range dict { + for _, key := range words { exactMatch := false - entry := dict[kanji] - if kanji == query { + entry := dict[key] + if entry.Kanji == query { exactMatch = true goto match } - if strings.Contains(kanji, query) { + if strings.Contains(entry.Kanji, query) { goto match } + for _, definition := range entry.Definitions { + if strings.Contains(strings.ToLower(definition.Definition), strings.ToLower(strings.TrimSpace(query))) { + goto match + } + } // TODO: Skip if query contains kanji if strings.Contains(entry.Reading, query) { goto match @@ -171,6 +314,12 @@ func main() { return } fmt.Println("JMdict loaded!") + httputils.DefaultTemplateFuncs = template.FuncMap{ + "highlight": func(input string) string { + return input + }, + } + httputils.TemplateFuncs = httputils.DefaultTemplateFuncs r := mux.NewRouter() r.HandleFunc("/", httputils.GenerateHandler( func(w http.ResponseWriter, r *http.Request) bool { return true }, @@ -211,15 +360,20 @@ func main() { }, httputils.NewTemplateSet("index.html", "search.html"), // template data - func(w http.ResponseWriter, r *http.Request) (template string, data any) { + func(w http.ResponseWriter, r *http.Request) (templateName string, data any) { if r.Header.Get("HX-Request") == "" { - template = "search.html" + templateName = "search.html" } else { - template = "search" + templateName = "search" } // Only runs if handler returns true query := mux.Vars(r)["query"] data = Search(query) + httputils.TemplateFuncs = template.FuncMap{ + "highlight": func(input string) template.HTML { + return highlight(input, strings.TrimSpace(query)) + }, + } return }, []string{http.MethodGet}, @@ -250,6 +404,6 @@ func main() { }, []string{http.MethodGet}, )) - r.Handle("/", http.FileServer(http.Dir("static"))) + r.PathPrefix("/").Handler(http.FileServer(http.Dir("static"))) log.Fatal(http.ListenAndServe(":3334", r)) } diff --git a/dict/static/logo.svg b/dict/static/logo.svg new file mode 100644 index 0000000..9e412b3 --- /dev/null +++ b/dict/static/logo.svg @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/dict/templates/index.html b/dict/templates/index.html index ea33ea8..e16e3da 100644 --- a/dict/templates/index.html +++ b/dict/templates/index.html @@ -22,8 +22,11 @@
- + +
+ jichan.org is in open alpha. The site may be buggy. More features are coming soon. If you have ideas or want to contribute, DM me on Discord @elnudev or open an issue on Codeberg. +
- {{ block "results" . }}{{ if .Query }}{{ template "search" . }}{{ end }}{{ end }} + {{ block "results" . }} + {{ if .Query }} + {{ template "search" . }} + {{ else }} +

Thank you to

+ + {{ end }} + {{ end }}

diff --git a/dict/templates/partials/definition.html b/dict/templates/partials/definition.html index 86595c7..cc826d4 100644 --- a/dict/templates/partials/definition.html +++ b/dict/templates/partials/definition.html @@ -1,3 +1,3 @@ {{- define "definition" -}} -{{ if .PartOfSpeech }}{{ .PartOfSpeech }}
{{ end }}{{ .Definition -}} +{{ if .PartOfSpeech }}{{ .PartOfSpeech }}
{{ end }}{{ highlight .Definition -}} {{ end }} \ No newline at end of file diff --git a/dict/templates/partials/entry.html b/dict/templates/partials/entry.html index 9d74569..0d42249 100644 --- a/dict/templates/partials/entry.html +++ b/dict/templates/partials/entry.html @@ -1,11 +1,15 @@ {{ define "entry" }}
-

- {{- if .Kanji -}} - {{- .Kanji -}}({{- .Reading -}}) - {{- else -}} - {{- .Reading -}} +

+ + {{- range .Furigana -}} + {{- if .Furigana -}} + {{- .Kanji -}}({{- .Furigana -}}) + {{- else -}} + {{- .Kanji -}} + {{- end -}} {{- end -}} +

{{- $count := len .Definitions -}} {{ if eq $count 1 -}} diff --git a/dict/templates/partials/entryfull.html b/dict/templates/partials/entryfull.html index 8488cbc..dce3966 100644 --- a/dict/templates/partials/entryfull.html +++ b/dict/templates/partials/entryfull.html @@ -1,10 +1,12 @@ {{ define "entryfull" }}
-

- {{- if .Kanji -}} - {{- .Kanji -}}({{- .Reading -}}) - {{- else -}} - {{- .Reading -}} +

+ {{- range .Furigana -}} + {{- if .Furigana -}} + {{- .Kanji -}}({{- .Furigana -}}) + {{- else -}} + {{- .Kanji -}} + {{- end -}} {{- end -}}

{{- $count := len .Definitions -}} diff --git a/dict/templates/partials/sitetitle.html b/dict/templates/partials/sitetitle.html index 67e6c0a..72b7d48 100644 --- a/dict/templates/partials/sitetitle.html +++ b/dict/templates/partials/sitetitle.html @@ -1 +1 @@ -{{ define "sitetitle" }}jidict{{ end }} \ No newline at end of file +{{ define "sitetitle" }}jichan.org{{ end }} \ No newline at end of file diff --git a/dict/templates/search.html b/dict/templates/search.html index ffa8680..b143d74 100644 --- a/dict/templates/search.html +++ b/dict/templates/search.html @@ -3,7 +3,7 @@ {{- define "value" }}{{ .Query }}{{- end -}} {{- define "results" -}} -{{- template "entryfull" .Entry -}} +{{- template "search" . -}} {{- end -}} {{- template "index" . -}} \ No newline at end of file diff --git a/httputils/templates.go b/httputils/templates.go index 7c48e1e..2e61d06 100644 --- a/httputils/templates.go +++ b/httputils/templates.go @@ -29,9 +29,11 @@ func newTemplateSet(partials *TemplateSet, paths ...string) TemplateSet { fileInfo, _ := os.Stat(path) modTimes[path] = fileInfo.ModTime() } - templates := template.Must(template.ParseFiles(allPaths...)) + templates := template.Template{} + templates.Funcs(DefaultTemplateFuncs) + templates.ParseFiles(allPaths...) return TemplateSet{ - templates: templates, + templates: &templates, paths: allPaths, loadTimes: modTimes, } @@ -46,7 +48,10 @@ func NewTemplateSet(paths ...string) TemplateSet { func (templateSet *TemplateSet) ExecuteTemplate(wr io.Writer, name string, data any) error { templateSet.reloadTemplatesIfModified() - return templateSet.templates.ExecuteTemplate(wr, name, data) + templateSet.templates.Funcs(TemplateFuncs) + err := templateSet.templates.ExecuteTemplate(wr, name, data) + TemplateFuncs = DefaultTemplateFuncs + return err } func (templateSet *TemplateSet) reloadTemplatesIfModified() { @@ -86,3 +91,5 @@ const partialsFolder = templateFolder + "/partials" var paths, _ = getTemplatePathsInDirectory(partialsFolder) var partials = newTemplateSet(nil, paths...) +var DefaultTemplateFuncs template.FuncMap +var TemplateFuncs template.FuncMap