Compare commits

...

4 commits

3 changed files with 141 additions and 53 deletions

View file

@ -1,5 +1,5 @@
use codes_iso_639::part_2::LanguageCode; use codes_iso_639::part_2::LanguageCode;
use serde::{Serialize, Deserialize}; use serde::{Deserialize, Serialize};
use crate::serial::*; use crate::serial::*;
@ -13,8 +13,7 @@ pub struct Jmdict {
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)] #[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
pub struct JmdictEntry { pub struct JmdictEntry {
pub sequence: i32, pub sequence: i32,
#[serde(deserialize_with = "nullable_list")] pub kanji: Option<Vec<JmdictKanji>>,
pub kanji: Vec<JmdictKanji>,
pub readings: Vec<JmdictReading>, pub readings: Vec<JmdictReading>,
pub sense: Vec<JmdictSense>, pub sense: Vec<JmdictSense>,
} }
@ -48,10 +47,8 @@ pub enum JmdictGlossaryType {
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)] #[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
pub struct JmdictKanji { pub struct JmdictKanji {
pub expression: String, pub expression: String,
#[serde(deserialize_with = "nullable_list")] pub information: Option<Vec<String>>,
pub information: Vec<String>, pub priorities: Option<Vec<String>>,
#[serde(deserialize_with = "nullable_list")]
pub priorities: Vec<String>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
@ -61,44 +58,129 @@ pub struct JmdictReading {
// Always empty with latest version of JMdict // Always empty with latest version of JMdict
#[serde(default, skip_serializing, deserialize_with = "err_if_not_blank")] #[serde(default, skip_serializing, deserialize_with = "err_if_not_blank")]
no_kanji: serde::de::IgnoredAny, no_kanji: serde::de::IgnoredAny,
#[serde(deserialize_with = "nullable_list")] pub restrictions: Option<Vec<String>>,
pub restrictions: Vec<String>, pub information: Option<Vec<String>>,
#[serde(deserialize_with = "nullable_list")] pub priorities: Option<Vec<String>>,
pub information: Vec<String>,
#[serde(deserialize_with = "nullable_list")]
pub priorities: Vec<String>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)] #[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
pub struct JmdictSense { pub struct JmdictSense {
#[serde(deserialize_with = "nullable_list")] pub restricted_kanji: Option<Vec<String>>,
pub restricted_kanji: Vec<String>, pub restricted_readings: Option<Vec<String>>,
#[serde(deserialize_with = "nullable_list")] pub references: Option<Vec<String>>,
pub restricted_readings: Vec<String>, pub antonyms: Option<Vec<String>>,
#[serde(deserialize_with = "nullable_list")] pub parts_of_speech: Option<Vec<JmdictSensePartOfSpeech>>,
pub references: Vec<String>, pub fields: Option<Vec<String>>,
#[serde(deserialize_with = "nullable_list")] pub misc: Option<Vec<String>>,
pub antonyms: Vec<String>, pub source_languages: Option<Vec<JmdictSource>>,
#[serde(deserialize_with = "nullable_list")] pub dialects: Option<Vec<String>>,
pub parts_of_speech: Vec<String>, pub information: Option<Vec<String>>,
#[serde(deserialize_with = "nullable_list")] pub glossary: Option<Vec<JmdictGlossary>>,
pub fields: Vec<String>,
#[serde(deserialize_with = "nullable_list")]
pub misc: Vec<String>,
#[serde(deserialize_with = "nullable_list")]
pub source_languages: Vec<JmdictSource>,
#[serde(deserialize_with = "nullable_list")]
pub dialects: Vec<String>,
#[serde(deserialize_with = "nullable_list")]
pub information: Vec<String>,
#[serde(deserialize_with = "nullable_list")]
pub glossary: Vec<JmdictGlossary>,
// Always empty with latest version of JMdict // Always empty with latest version of JMdict
#[serde(default, skip_serializing, deserialize_with = "err_if_not_blank")] #[serde(default, skip_serializing, deserialize_with = "err_if_not_blank")]
examples: serde::de::IgnoredAny, examples: serde::de::IgnoredAny,
} }
macro_rules! JmdictSensePartOfSpeech {
{ $( $variant:ident, $description:expr ),* } => {
#[derive(Serialize, Deserialize)]
pub enum JmdictSensePartOfSpeech {
$(
#[serde(rename = $description)]
#[doc = $description]
$variant
),*
}
};
}
JmdictSensePartOfSpeech! {
Unclassified, "unclassified",
Noun, "noun (common) (futsuumeishi)",
NounGenative, "nouns which may take the genitive case particle 'no'",
NounPrefix, "noun, used as a prefix",
NounSuffix, "noun, used as a suffix",
Expressions, "expressions (phrases, clauses, etc.)",
AdjectiveI, "adjective (keiyoushi)",
AdjectiveIYoiIi, "adjective (keiyoushi) - yoi/ii class",
AdjectiveNa, "adjectival nouns or quasi-adjectives (keiyodoshi)",
AdjectiveNaArchaicFormal, "archaic/formal form of na-adjective",
AdjectiveTaru, "'taru' adjective",
AdjectiveShiku, "'shiku' adjective (archaic)",
AdjectivePreNoun, "pre-noun adjectival (rentaishi)",
AdjectiveAuxiliary, "auxiliary adjective",
VerbUnspecified, "verb unspecified", // e.g 得る
VerbIchidan, "Ichidan verb",
VerbIchidanKureru, "Ichidan verb - kureru special class",
VerbIchidanZuru, "Ichidan verb - zuru verb (alternative form of -jiru verbs)",
VerbSu, "su verb - precursor to the modern suru",
VerbNidanLowerMu, "Nidan verb (lower class) with 'mu' ending (archaic)",
VerbNidanLowerRu, "Nidan verb (lower class) with 'ru' ending (archaic)",
VerbNidanLowerFu, "Nidan verb (lower class) with 'hu/fu' ending (archaic)",
VerbNidanLowerGu, "Nidan verb (lower class) with 'gu' ending (archaic)",
VerbNidanLowerYu, "Nidan verb (lower class) with 'yu' ending (archaic)",
VerbNidanLowerDzu, "Nidan verb (lower class) with 'dzu' ending (archaic)",
VerbNidanLowerKu, "Nidan verb (lower class) with 'ku' ending (archaic)",
VerbNidanLowerSu, "Nidan verb (lower class) with 'su' ending (archaic)",
VerbNidanLowerZu, "Nidan verb (lower class) with 'zu' ending (archaic)",
VerbNidanLowerTsu, "Nidan verb (lower class) with 'tsu' ending (archaic)",
VerbNidanLowerNu, "Nidan verb (lower class) with 'nu' ending (archaic)",
VerbNidanLowerUWe, "Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic)",
VerbNidanUpperFu, "Nidan verb (upper class) with 'hu/fu' ending (archaic)",
VerbNidanUpperRu, "Nidan verb (upper class) with 'ru' ending (archaic)",
VerbNidanUpperTsu, "Nidan verb (upper class) with 'tsu' ending (archaic)",
VerbNidanUpperYu, "Nidan verb (upper class) with 'yu' ending (archaic)",
VerbNidanUpperKu, "Nidan verb (upper class) with 'ku' ending (archaic)",
VerbNidanUpperGu, "Nidan verb (upper class) with 'gu' ending (archaic)",
VerbNidanUpperBu, "Nidan verb (upper class) with 'bu' ending (archaic)",
VerbNidanU, "Nidan verb with 'u' ending (archaic)",
VerbGodanU, "Godan verb with 'u' ending",
VerbGodanUSpecial, "Godan verb with 'u' ending (special class)",
VerbGodanSu, "Godan verb with 'su' ending",
VerbGodanKu, "Godan verb with 'ku' ending",
VerbGodanRu, "Godan verb with 'ru' ending",
VerbGodanRuIrregular, "Godan verb with 'ru' ending (irregular verb)", // e.g. ある
VerbGodanMu, "Godan verb with 'mu' ending",
VerbGodanGu, "Godan verb with 'gu' ending",
VerbGodanTsu, "Godan verb with 'tsu' ending",
VerbGodanBu, "Godan verb with 'bu' ending",
VerbGodanNu, "Godan verb with 'nu' ending",
VerbGodanAru, "Godan verb - -aru special class", // e.g. いらっしゃる
VerbGodanIkuYuku, "Godan verb - Iku/Yuku special class",
VerbNuIrregular, "irregular nu verb", // e.g. 死ぬ
VerbKu, "'ku' adjective (archaic)",
VerbYodanRu, "Yodan verb with 'ru' ending (archaic)",
VerbYodanMu, "Yodan verb with 'mu' ending (archaic)",
VerbYodanSu, "Yodan verb with 'su' ending (archaic)",
VerbYodanKu, "Yodan verb with 'ku' ending (archaic)",
VerbYodanFu, "Yodan verb with 'hu/fu' ending (archaic)",
VerbYodanTsu, "Yodan verb with 'tsu' ending (archaic)",
VerbYodanGu, "Yodan verb with 'gu' ending (archaic)",
VerbYodanBu, "Yodan verb with 'bu' ending (archaic)",
VerbAuxiliary, "auxiliary verb",
VerbTransitive, "transitive verb",
VerbIntransitive, "intransitive verb",
VerbSuru, "suru verb - special class",
VerbSuruNoun, "noun or participle which takes the aux. verb suru",
VerbSuruIncluded, "suru verb - included",
VerbKuru, "Kuru verb - special class",
VerbRuIrregularRi, "irregular ru verb, plain form ends with -ri", // なり
Pronoun, "pronoun",
Adverb, "adverb (fukushi)",
AdverbTo, "adverb taking the 'to' particle",
Interjection, "interjection (kandoushi)",
Prenominal, "noun or verb acting prenominally", // e.g. ええ
Conjunction, "conjunction",
Particle, "particle",
Auxiliary, "auxiliary",
Copula, "copula", // である
Prefix, "prefix",
Suffix, "suffix",
Counter, "counter",
Numeric, "numeric"
}
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)] #[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
pub struct JmdictSource { pub struct JmdictSource {

View file

@ -3,27 +3,15 @@ use serde::{Deserialize, Deserializer};
use serde_json::Value; use serde_json::Value;
use std::str::FromStr; use std::str::FromStr;
pub fn nullable_list<'de, D, T>(deserializer: D) -> Result<Vec<T>, D::Error>
where
D: Deserializer<'de>,
T: Deserialize<'de>,
{
// Deserialize the JSON value using Serde's Value type
let value: Value = Deserialize::deserialize(deserializer)?;
// Check if the value is null and return an empty vector in that case
if value.is_null() {
Ok(Vec::new())
} else {
// Otherwise, try to deserialize the value as a Vec<String>
<Vec<T> as serde::Deserialize>::deserialize(value).map_err(serde::de::Error::custom)
}
}
pub fn wasei<'de, D>(deserializer: D) -> Result<bool, D::Error> pub fn wasei<'de, D>(deserializer: D) -> Result<bool, D::Error>
where where
D: Deserializer<'de>, D: Deserializer<'de>,
{ {
if !deserializer.is_human_readable() {
// bincode, will have already been serialized as bool
return Ok(Deserialize::deserialize(deserializer)?);
}
// Deserialize the JSON value using Serde's Value type // Deserialize the JSON value using Serde's Value type
let value: Value = Deserialize::deserialize(deserializer)?; let value: Value = Deserialize::deserialize(deserializer)?;
@ -58,6 +46,11 @@ pub fn lang_default_eng<'de, D>(deserializer: D) -> Result<LanguageCode, D::Erro
where where
D: Deserializer<'de>, D: Deserializer<'de>,
{ {
if !deserializer.is_human_readable() {
// bincode, will have already been serialized as LanguageCode
return Ok(Deserialize::deserialize(deserializer)?);
}
lang_option(deserializer) lang_option(deserializer)
.transpose() .transpose()
.unwrap_or(Ok(LanguageCode::Eng)) .unwrap_or(Ok(LanguageCode::Eng))
@ -67,6 +60,11 @@ pub fn lang_option<'de, D>(deserializer: D) -> Result<Option<LanguageCode>, D::E
where where
D: Deserializer<'de>, D: Deserializer<'de>,
{ {
if !deserializer.is_human_readable() {
// bincode, will have already been serialized as Option<LanguageCode>
return Ok(Deserialize::deserialize(deserializer)?);
}
Option::deserialize(deserializer)? Option::deserialize(deserializer)?
.map(|str: String| parse_language_code(&str)) .map(|str: String| parse_language_code(&str))
.transpose() .transpose()
@ -77,6 +75,11 @@ pub fn err_if_not_blank<'de, D>(deserializer: D) -> Result<serde::de::IgnoredAny
where where
D: Deserializer<'de>, D: Deserializer<'de>,
{ {
if !deserializer.is_human_readable() {
// bincode, will have already been "serialized" as serde::de::IgnoredAny
return Ok(serde::de::IgnoredAny);
}
let value = serde_json::Value::deserialize(deserializer)?; let value = serde_json::Value::deserialize(deserializer)?;
if value.is_null() || value == "" { if value.is_null() || value == "" {
Ok(serde::de::IgnoredAny) Ok(serde::de::IgnoredAny)

View file

@ -1,4 +1,7 @@
use std::{fs::{self, File}, io::{Write, Read}}; use std::{
fs::{self, File},
io::{Read, Write},
};
use crate::jmdict::Jmdict; use crate::jmdict::Jmdict;