package main import ( "bufio" "encoding/gob" "encoding/json" "fmt" "html/template" "log" "net/http" "os" "strconv" "strings" "foosoft.net/projects/jmdict" "git.elnu.com/ElnuDev/jichanorg/httputils" "github.com/gorilla/mux" ) var words []string // since iterating over map isn't the same every time var dict map[string]Entry func LoadDict() error { type binaryData struct { Words []string Dict map[string]Entry } // Loading from binary const binaryFile = "dict.bin" file, err := os.Open(binaryFile) if err != nil { if !os.IsNotExist(err) { return err } } else { defer file.Close() decoder := gob.NewDecoder(file) var data binaryData err = decoder.Decode(&data) words = data.Words dict = data.Dict return err } // Loading from JMdict const jmdictFile = "JMdict.xml" reader, err := os.Open(jmdictFile) if err != nil { return err } jmdict, _, err := jmdict.LoadJmdict(reader) if err != nil { return err } const jmdictFuriganaFile = "JmdictFurigana.txt" reader, err = os.Open(jmdictFuriganaFile) if err != nil { return err } scanner := bufio.NewScanner(reader) furiganaData := make(map[string]string) for scanner.Scan() { params := strings.Split(scanner.Text(), "|") // We need to include the reading as well since some words have the same kanji furiganaData[fmt.Sprintf("%s|%s", params[0], params[1])] = params[2] } words = make([]string, len(jmdict.Entries)) dict = make(map[string]Entry) for i, jmdictEntry := range jmdict.Entries { // お願い致します|おねがいいたします|1:ねが;3:いた var furiganaInfo *string if len(jmdictEntry.Kanji) > 0 { data := furiganaData[fmt.Sprintf("%s|%s", jmdictEntry.Kanji[0].Expression, jmdictEntry.Readings[0].Reading)] furiganaInfo = &data } else { furiganaInfo = nil } entry := ParseEntry(&jmdictEntry, furiganaInfo) offset := 0 getKey := func() string { if offset == 0 { // unique return entry.Kanji } else { return fmt.Sprintf("%s-%d", entry.Kanji, offset) } } for { if _, ok := dict[getKey()]; ok { offset++ } else { break } } key := getKey() entry.Key = key words[i] = key dict[key] = entry } // Encoding to binary file, err = os.Create(binaryFile) if err != nil { return err } defer file.Close() encoder := gob.NewEncoder(file) data := binaryData{ Words: words, Dict: dict, } err = encoder.Encode(&data) if err != nil { return err } return nil } type Furigana struct { Kanji string Furigana string } type Entry struct { Key string Kanji string // Mapping of character index to furigana Furigana []Furigana Reading string Definitions []Definition } type Definition struct { Definition string PartOfSpeech []PartOfSpeech } type PartOfSpeech struct { Description string Type string } func ParseEntry(entry *jmdict.JmdictEntry, furiganaInfo *string) Entry { kanji := "" if len(entry.Kanji) > 0 { kanji = entry.Kanji[0].Expression } else { kanji = entry.Readings[0].Reading } reading := "" if kanji != "" { reading = entry.Readings[0].Reading } var definitions []Definition definitions = make([]Definition, len(entry.Sense)) for i, sense := range entry.Sense { definition := sense.Glossary[0].Content if len(sense.Glossary) > 1 { for _, glossary := range sense.Glossary[1:] { definition += "; " + glossary.Content } } pos := make([]PartOfSpeech, len(sense.PartsOfSpeech)) for i, posFull := range sense.PartsOfSpeech { class := "" if strings.Contains(posFull, "noun") { class = "noun" } else if strings.Contains(posFull, "adjective") { class = "adjective" } else if strings.Contains(posFull, "adverb") { class = "adverb" } else if strings.Contains(posFull, "verb") { class = "verb" } else if strings.Contains(posFull, "particle") { class = "particle" } else if strings.Contains(posFull, "phrase") { class = "phrase" } pos[i] = PartOfSpeech{ Description: posFull, Type: class, } } definitions[i] = Definition{ Definition: definition, PartOfSpeech: pos, } } // 1:ねが;3:いた var furiganaList []Furigana if reading == "" || furiganaInfo == nil || *furiganaInfo == "" { furiganaList = []Furigana{{Kanji: reading, Furigana: ""}} } else { furiganaEntries := strings.Split(*furiganaInfo, ";") // ["1:ねが", "3:いた"] type rawFurigana struct { from int to int furigana string } ruby := make([]rawFurigana, 0) for _, entry := range furiganaEntries { // 1:ねが // multiple: 0-1:きょう params := strings.Split(entry, ":") // ["1", "ねが"] // multiple: ["0-1", "きょう"] indexRange := strings.Split(params[0], "-") // [1] // multiple: [0, 1] var from, to int if len(indexRange) == 1 { index, _ := strconv.Atoi(indexRange[0]) from, to = index, index } else { from, _ = strconv.Atoi(indexRange[0]) to, _ = strconv.Atoi(indexRange[1]) } ruby = append(ruby, rawFurigana{ from: from, to: to, furigana: params[1], }) } furiganaList = make([]Furigana, 0) slice := func(from, to int) string { return string([]rune(kanji)[from : to+1]) } nextIndex := 0 for _, raw := range ruby { if raw.from > nextIndex { furiganaList = append(furiganaList, Furigana{ Kanji: slice(nextIndex, raw.from-1), Furigana: "", }) } furiganaList = append(furiganaList, Furigana{ Kanji: slice(raw.from, raw.to), Furigana: raw.furigana, }) nextIndex = raw.to + 1 } length := len([]rune(kanji)) if nextIndex < length { furiganaList = append(furiganaList, Furigana{ Kanji: slice(nextIndex, length-1), Furigana: "", }) } } return Entry{ Kanji: kanji, Furigana: furiganaList, Reading: reading, Definitions: definitions, } } func highlight(input, substring string) template.HTML { // Replace all occurrences of substring with the highlighted version replacement := fmt.Sprintf("%s", substring) result := strings.ReplaceAll(input, substring, replacement) return template.HTML(result) } func Search(query string) queryResult { query = strings.TrimSpace(query) exactResults := make([]Entry, 0) otherResults := make([]Entry, 0) truncated := false count := 0 for _, key := range words { exactMatch := false entry := dict[key] if entry.Kanji == query { exactMatch = true goto match } if strings.Contains(entry.Kanji, query) { goto match } for _, definition := range entry.Definitions { if strings.Contains(strings.ToLower(definition.Definition), strings.ToLower(strings.TrimSpace(query))) { goto match } } // TODO: Skip if query contains kanji if strings.Contains(entry.Reading, query) { goto match } continue match: if exactMatch { exactResults = append(exactResults, entry) } else { otherResults = append(otherResults, entry) } count++ if count >= 500 { truncated = true break } } return queryResult{ Query: query, ExactResults: exactResults, OtherResults: otherResults, Truncated: truncated, Count: len(exactResults) + len(otherResults), } } func Lookup(word string) *Entry { for kanji, entry := range dict { if kanji == word { return &entry } } return nil } type queryResult struct { // Fields must be capitalized // to be accessible in templates Query string ExactResults []Entry OtherResults []Entry Truncated bool Count int } func main() { err := LoadDict() if err != nil { fmt.Println(err) return } fmt.Println("JMdict loaded!") httputils.DefaultTemplateFuncs = template.FuncMap{ "highlight": func(input string) string { return input }, } httputils.TemplateFuncs = httputils.DefaultTemplateFuncs r := mux.NewRouter() r.HandleFunc("/", httputils.GenerateHandler( func(w http.ResponseWriter, r *http.Request) bool { return true }, httputils.NewTemplateSet("index.html"), func(w http.ResponseWriter, r *http.Request) (string, any) { return "index.html", nil }, []string{http.MethodGet}, )) rawSearchHandler := func(w http.ResponseWriter, r *http.Request) { r.ParseMultipartForm(0) q := r.FormValue("q") var redirect string if q == "" { redirect = "/" } else { redirect = "/search/" + q } http.Redirect(w, r, redirect, http.StatusMovedPermanently) } r.HandleFunc("/search", rawSearchHandler) r.HandleFunc("/search/", rawSearchHandler) r.HandleFunc("/search/{query}", httputils.GenerateHandler( // handler whether or not to use template func(w http.ResponseWriter, r *http.Request) bool { // If Accept: applicaiton/json we'll use the template if r.Header.Get("Accept") != "application/json" { return true } // Otherwise, let's send JSON query := mux.Vars(r)["query"] result := Search(query) jsonBytes, _ := json.Marshal(append(result.ExactResults, result.OtherResults...)) w.Header().Set("Content-Type", "application/json; charset=utf-8") fmt.Fprint(w, string(jsonBytes)) return false }, httputils.NewTemplateSet("index.html", "search.html"), // template data func(w http.ResponseWriter, r *http.Request) (templateName string, data any) { if r.Header.Get("HX-Request") == "" { templateName = "search.html" } else { templateName = "search" } // Only runs if handler returns true query := mux.Vars(r)["query"] data = Search(query) httputils.TemplateFuncs = template.FuncMap{ "highlight": func(input string) template.HTML { return highlight(input, strings.TrimSpace(query)) }, } return }, []string{http.MethodGet}, )) rawWordHandler := func(w http.ResponseWriter, r *http.Request) { fmt.Println("Redirecting raw word handler") http.Redirect(w, r, "/", http.StatusMovedPermanently) } r.HandleFunc("/word", rawWordHandler) r.HandleFunc("/word/", rawWordHandler) r.HandleFunc("/word/{word}", httputils.GenerateHandler( func(w http.ResponseWriter, r *http.Request) bool { return true }, // Order matters // word.html overrided the results block in index.html // so should be loaded second httputils.NewTemplateSet("index.html", "word.html"), func(w http.ResponseWriter, r *http.Request) (template string, data any) { template = "word.html" query := mux.Vars(r)["word"] data = struct { Query any Entry *Entry }{ Query: nil, Entry: Lookup(query), } return }, []string{http.MethodGet}, )) r.PathPrefix("/").Handler(http.FileServer(http.Dir("static"))) log.Fatal(http.ListenAndServe(":3334", r)) }