reset history

There is about four weeks worth of history, the interesting parts of
which I've documented in `CONTRIBUTING.md`. I'm now throwing this
history away because there is a lot of messing with data files in there
that bloats the repo unnecessarily, and this is my last chance to get
rid of that bloat before other people start pulling it.
This commit is contained in:
Stefan Majewsky 2021-04-18 14:13:28 +02:00
commit 5ceeec3acc
26 changed files with 195162 additions and 0 deletions

26
jmdict-enums/Cargo.toml Normal file
View file

@ -0,0 +1,26 @@
[package]
name = "jmdict-enums"
version = "0.1.0"
authors = ["Stefan Majewsky <majewsky@gmx.net>"]
edition = "2018"
description = "Autogenerated enums for the jmdict crate. Do not import directly."
readme = "README.md"
homepage = "https://github.com/majewsky/rust-jmdict/tree/main/jmdict-enums"
license = "Apache-2.0"
[dependencies]
[build-dependencies]
json = "^0.12.0"
[features]
scope-archaic = []
translations-eng = []
translations-dut = []
translations-fre = []
translations-ger = []
translations-hun = []
translations-rus = []
translations-slv = []
translations-spa = []
translations-swe = []

12
jmdict-enums/README.md Normal file
View file

@ -0,0 +1,12 @@
# jmdict-enums
Autogenerated enums for the `jmdict` crate.
This code is in a separate crate because, if we put it in the `jmdict` crate itself, its `build.rs`
could not import it.
## Compatibility promise
**There is none.** This crate can disappear at any time if we choose to restructure the build system
for the `jmdict` crate. To use the types from this crate, look at the re-exports of the same name in
[the `jmdict` crate](https://docs.rs/jmdict/).

514
jmdict-enums/build.rs Normal file
View file

@ -0,0 +1,514 @@
/*******************************************************************************
* Copyright 2021 Stefan Majewsky <majewsky@gmx.net>
* SPDX-License-Identifier: Apache-2.0
* Refer to the file "LICENSE" for details.
*******************************************************************************/
use json::JsonValue;
struct EnumVariant {
code: &'static str,
name: &'static str,
enabled: bool,
}
fn v(code: &'static str, name: &'static str) -> EnumVariant {
EnumVariant {
code,
name,
enabled: true,
}
}
impl EnumVariant {
fn when(self, enabled: bool) -> Self {
Self { enabled, ..self }
}
}
struct Enum<'a> {
name: &'static str,
all_name: Option<&'static str>,
doc: String,
entities: Option<&'a JsonValue>,
variants: Vec<EnumVariant>,
}
fn main() {
println!("cargo:rerun-if-changed=build.rs");
println!("cargo:rerun-if-changed=data/entities.json");
let entities_str = std::fs::read_to_string("data/entities.json").unwrap();
let entities = json::parse(&entities_str).unwrap();
let mut content = String::new();
content.push_str(&process(Enum {
name: "Dialect",
all_name: None,
doc: "Dialect of Japanese in which a certain vocabulary occurs.".into(),
entities: Some(&entities["dial"]),
variants: vec![
v("hob", "Hokkaido"),
v("ksb", "Kansai"),
v("ktb", "Kantou"),
v("kyb", "Kyoto"),
v("kyu", "Kyuushuu"),
v("nab", "Nagano"),
v("osb", "Osaka"),
v("rkb", "Ryuukyuu"),
v("thb", "Touhoku"),
v("tsb", "Tosa"),
v("tsug", "Tsugaru"),
],
}));
content.push_str(&process(Enum {
name: "GlossLanguage",
all_name: Some("AllGlossLanguage"),
doc: "The language of a particular Gloss.".into(),
entities: None,
variants: vec![
v("eng", "English").when(cfg!(feature = "translations-eng")),
v("dut", "Dutch").when(cfg!(feature = "translations-dut")),
v("fre", "French").when(cfg!(feature = "translations-fre")),
v("ger", "German").when(cfg!(feature = "translations-ger")),
v("hun", "Hungarian").when(cfg!(feature = "translations-hun")),
v("rus", "Russian").when(cfg!(feature = "translations-rus")),
v("slv", "Slovenian").when(cfg!(feature = "translations-slv")),
v("spa", "Spanish").when(cfg!(feature = "translations-spa")),
v("swe", "Swedish").when(cfg!(feature = "translations-swe")),
],
}));
content.push_str(&process(Enum {
name: "GlossType",
all_name: None,
doc: "Type of gloss.".into(),
entities: None,
variants: vec![
v("", "RegularTranslation"),
v("expl", "Explanation"),
v("fig", "FigurativeSpeech"),
v("lit", "LiteralTranslation"),
],
}));
content.push_str(&process(Enum {
name: "KanjiInfo",
all_name: None,
doc: "Information regarding a certain KanjiElement.".into(),
entities: Some(&entities["ke_inf"]),
variants: vec![
v("ateji", "Ateji"),
v("iK", "IrregularKanjiUsage"),
v("ik", "IrregularKanaUsage"),
v("io", "IrregularOkuriganaUsage"),
v("oK", "OutdatedKanji"),
],
}));
content.push_str(&process(Enum {
name: "PartOfSpeech",
all_name: Some("AllPartOfSpeech"),
doc: "Where a word can appear in a sentence for a particular Sense of the word.".into(),
entities: Some(&entities["pos"]),
variants: vec![
v("adj-f", "NounOrVerbActingPrenominally"),
v("adj-i", "Adjective"),
v("adj-ix", "YoiAdjective"),
v("adj-kari", "KariAdjective").when(cfg!(feature = "scope-archaic")),
v("adj-ku", "KuAdjective").when(cfg!(feature = "scope-archaic")),
v("adj-na", "AdjectivalNoun"),
v("adj-nari", "NariAdjective").when(cfg!(feature = "scope-archaic")),
v("adj-no", "NoAdjective"),
v("adj-pn", "PreNounAdjectival"),
v("adj-shiku", "ShikuAdjective").when(cfg!(feature = "scope-archaic")),
v("adj-t", "TaruAdjective"),
v("adv", "Adverb"),
v("adv-to", "AdverbTakingToParticle"),
v("aux", "Auxiliary"),
v("aux-adj", "AuxiliaryAdjective"),
v("aux-v", "AuxiliaryVerb"),
v("conj", "Conjunction"),
v("cop", "Copula"),
v("ctr", "Counter"),
v("exp", "Expression"),
v("int", "Interjection"),
v("n", "CommonNoun"),
v("n-adv", "AdverbialNoun"),
v("n-pr", "ProperNoun"),
v("n-pref", "NounPrefix"),
v("n-suf", "NounSuffix"),
v("n-t", "TemporalNoun"),
v("num", "Numeric"),
v("pn", "Pronoun"),
v("pref", "Prefix"),
v("prt", "Particle"),
v("suf", "Suffix"),
v("unc", "Unclassified"),
v("v-unspec", "UnspecifiedVerb"),
v("v1", "IchidanVerb"),
v("v1-s", "IchidanKureruVerb"),
v("v2a-s", "NidanUVerb").when(cfg!(feature = "scope-archaic")),
v("v2b-k", "UpperNidanBuVerb").when(cfg!(feature = "scope-archaic")),
v("v2b-s", "LowerNidanBuVerb").when(cfg!(feature = "scope-archaic")),
v("v2d-k", "UpperNidanDzuVerb").when(cfg!(feature = "scope-archaic")),
v("v2d-s", "LowerNidanDzuVerb").when(cfg!(feature = "scope-archaic")),
v("v2g-k", "UpperNidanGuVerb").when(cfg!(feature = "scope-archaic")),
v("v2g-s", "LowerNidanGuVerb").when(cfg!(feature = "scope-archaic")),
v("v2h-k", "UpperNidanFuVerb").when(cfg!(feature = "scope-archaic")),
v("v2h-s", "LowerNidanFuVerb").when(cfg!(feature = "scope-archaic")),
v("v2k-k", "UpperNidanKuVerb").when(cfg!(feature = "scope-archaic")),
v("v2k-s", "LowerNidanKuVerb").when(cfg!(feature = "scope-archaic")),
v("v2m-k", "UpperNidanMuVerb").when(cfg!(feature = "scope-archaic")),
v("v2m-s", "LowerNidanMuVerb").when(cfg!(feature = "scope-archaic")),
v("v2n-s", "LowerNidanNuVerb").when(cfg!(feature = "scope-archaic")),
v("v2r-k", "UpperNidanRuVerb").when(cfg!(feature = "scope-archaic")),
v("v2r-s", "LowerNidanRuVerb").when(cfg!(feature = "scope-archaic")),
v("v2s-s", "LowerNidanSuVerb").when(cfg!(feature = "scope-archaic")),
v("v2t-k", "UpperNidanTsuVerb").when(cfg!(feature = "scope-archaic")),
v("v2t-s", "LowerNidanTsuVerb").when(cfg!(feature = "scope-archaic")),
v("v2w-s", "LowerNidanUWeVerb").when(cfg!(feature = "scope-archaic")),
v("v2y-k", "UpperNidanYuVerb").when(cfg!(feature = "scope-archaic")),
v("v2y-s", "LowerNidanYuVerb").when(cfg!(feature = "scope-archaic")),
v("v2z-s", "LowerNidanZuVerb").when(cfg!(feature = "scope-archaic")),
v("v4b", "YodanBuVerb").when(cfg!(feature = "scope-archaic")),
v("v4g", "YodanGuVerb").when(cfg!(feature = "scope-archaic")),
v("v4h", "YodanFuVerb").when(cfg!(feature = "scope-archaic")),
v("v4k", "YodanKuVerb").when(cfg!(feature = "scope-archaic")),
v("v4m", "YodanMuVerb").when(cfg!(feature = "scope-archaic")),
v("v4n", "YodanNuVerb").when(cfg!(feature = "scope-archaic")),
v("v4r", "YodanRuVerb").when(cfg!(feature = "scope-archaic")),
v("v4s", "YodanSuVerb").when(cfg!(feature = "scope-archaic")),
v("v4t", "YodanTsuVerb").when(cfg!(feature = "scope-archaic")),
v("v5aru", "GodanAruVerb"),
v("v5b", "GodanBuVerb"),
v("v5g", "GodanGuVerb"),
v("v5k", "GodanKuVerb"),
v("v5k-s", "GodanIkuVerb"),
v("v5m", "GodanMuVerb"),
v("v5n", "GodanNuVerb"),
v("v5r", "GodanRuVerb"),
v("v5r-i", "IrregularGodanRuVerb"),
v("v5s", "GodanSuVerb"),
v("v5t", "GodanTsuVerb"),
v("v5u", "GodanUVerb"),
v("v5u-s", "IrregularGodanUVerb"),
v("vi", "IntransitiveVerb"),
v("vk", "KuruVerb"),
v("vn", "IrregularGodanNuVerb"),
v("vr", "IrregularGodanRuVerbWithPlainRiForm"),
v("vs", "SuruVerb"),
v("vs-c", "SuruPrecursorVerb"),
v("vs-i", "IncludedSuruVerb"),
v("vs-s", "SpecialSuruVerb"),
v("vt", "TransitiveVerb"),
v("vz", "IchidanZuruVerb"),
],
}));
content.push_str(&process(Enum {
name: "ReadingInfo",
all_name: None,
doc: "Information regarding a certain ReadingElement.".into(),
entities: Some(&entities["re_inf"]),
variants: vec![
v("gikun", "GikunOrJukujikun"),
v("ik", "IrregularKanaUsage"),
v("ok", "OutdatedKanaUsage"),
v("uK", "UsuallyWrittenUsingKanjiAlone"),
],
}));
content.push_str(&process(Enum {
name: "SenseInfo",
all_name: None,
doc: "Information regarding a certain Sense.".into(),
entities: Some(&entities["misc"]),
variants: vec![
v("X", "XRated"),
v("abbr", "Abbreviation"),
v("arch", "Archaism"),
v("char", "Character"),
v("chn", "ChildrensLanguage"),
v("col", "Colloquialism"),
v("company", "CompanyName"),
v("creat", "Creature"),
v("dated", "DatedTerm"),
v("dei", "Deity"),
v("derog", "Derogatory"),
v("ev", "Event"),
v("fam", "FamiliarLanguage"),
v("fem", "FemaleTermOrLanguage"),
v("fict", "Fiction"),
v("given", "GivenName"),
v("hist", "HistoricalTerm"),
v("hon", "HonorificLanguage"),
v("hum", "HumbleLanguage"),
v("id", "IdiomaticExpression"),
v("joc", "JocularTerm"),
v("leg", "Legend"),
v("litf", "LiteraryOrFormalTerm"),
v("m-sl", "MangaSlang"),
v("male", "MaleTermOrLanguage"),
v("myth", "Mythology"),
v("net-sl", "InternetSlang"),
v("obj", "Object"),
v("obs", "ObsoleteTerm"),
v("obsc", "ObscureTerm"),
v("on-mim", "Onomatopoeia"),
v("organization", "OrganizationName"),
v("oth", "Other"),
v("person", "PersonName"),
v("place", "PlaceName"),
v("poet", "PoeticalTerm"),
v("pol", "PoliteLanguage"),
v("product", "ProductName"),
v("proverb", "Proverb"),
v("quote", "Quotation"),
v("rare", "Rare"),
v("relig", "Religion"),
v("sens", "Sensitive"),
v("serv", "Service"),
v("sl", "Slang"),
v("station", "RailwayStation"),
v("surname", "Surname"),
v("uk", "UsuallyWrittenUsingKanaAlone"),
v("unclass", "UnclassifiedName"),
v("vulg", "VulgarTerm"),
v("work", "WorkOfArt"),
v("yoji", "Yojijukugo"),
],
}));
content.push_str(&process(Enum {
name: "SenseTopic",
all_name: None,
doc: "Field of study where a certain Sense originates.".into(),
entities: Some(&entities["field"]),
variants: vec![
v("Buddh", "Buddhism"),
v("Christn", "Christianity"),
v("MA", "MartialArts"),
v("Shinto", "Shinto"),
v("agric", "Agriculture"),
v("anat", "Anatomy"),
v("archeol", "Archeology"),
v("archit", "Architecture"),
v("art", "Art"),
v("astron", "Astronomy"),
v("audvid", "AudioVisual"),
v("aviat", "Aviation"),
v("baseb", "Baseball"),
v("biochem", "Biochemistry"),
v("biol", "Biology"),
v("bot", "Botany"),
v("bus", "Business"),
v("chem", "Chemistry"),
v("comp", "Computing"),
v("cryst", "Crystallography"),
v("ecol", "Ecology"),
v("econ", "Economics"),
v("elec", "ElectricalEngineering"),
v("electr", "Electronics"),
v("embryo", "Embryology"),
v("engr", "Engineering"),
v("ent", "Entomology"),
v("finc", "Finance"),
v("fish", "Fishing"),
v("food", "Food"),
v("gardn", "Gardening"),
v("genet", "Genetics"),
v("geogr", "Geography"),
v("geol", "Geology"),
v("geom", "Geometry"),
v("go", "Go"),
v("golf", "Golf"),
v("gramm", "Grammar"),
v("grmyth", "GreekMythology"),
v("hanaf", "Hanafuda"),
v("horse", "Horseracing"),
v("law", "Law"),
v("ling", "Linguistics"),
v("logic", "Logic"),
v("mahj", "Mahjong"),
v("math", "Mathematics"),
v("mech", "MechanicalEngineering"),
v("med", "Medicine"),
v("met", "Meteorology"),
v("mil", "Military"),
v("music", "Music"),
v("ornith", "Ornithology"),
v("paleo", "Paleontology"),
v("pathol", "Pathology"),
v("pharm", "Pharmacy"),
v("phil", "Philosophy"),
v("photo", "Photography"),
v("physics", "Physics"),
v("physiol", "Physiology"),
v("print", "Printing"),
v("psych", "Psychology"),
v("shogi", "Shogi"),
v("sports", "Sports"),
v("stat", "Statistics"),
v("sumo", "Sumo"),
v("telec", "Telecommunications"),
v("tradem", "Trademark"),
v("vidg", "VideoGame"),
v("zool", "Zoology"),
],
}));
let out_dir = std::env::var_os("OUT_DIR").unwrap();
let dest_path = std::path::Path::new(&out_dir).join("generated.rs");
std::fs::write(&dest_path, content).unwrap();
}
fn process(e: Enum) -> String {
let mut lines = vec![];
//render the corresponding fully-populated enum, if requested
if let Some(all_name) = e.all_name {
lines.push(process(Enum {
name: all_name,
all_name: None,
doc: format!("{} This enum contains all possible variants, including those that have been disabled by compile-time flags in `enum {}`.", e.doc, e.name),
entities: e.entities,
variants: e.variants.iter().map(|v| EnumVariant{enabled: true, ..*v}).collect(),
}));
}
//enum declaration
lines.push(format!("/// {}", e.doc));
lines.push("#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]".into());
lines.push(format!("pub enum {} {{", e.name));
for v in e.variants.iter().filter(|v| v.enabled) {
if let Some(ref entities) = e.entities {
lines.push(format!(" ///{}", entities[v.code].as_str().unwrap()));
}
lines.push(format!(" {},", v.name));
}
lines.push("}\n".into());
//start impl Enum
lines.push(format!("impl Enum for {} {{", e.name));
//fn code(&self) -> &str
lines.push(" fn code(&self) -> &'static str {".into());
lines.push(" match *self {".into());
for v in e.variants.iter().filter(|v| v.enabled) {
lines.push(format!(
" {}::{} => \"{}\",",
e.name, v.name, v.code
));
}
lines.push(" }".into());
lines.push(" }\n".into());
//fn constant_name(&self) -> &str
lines.push(" fn constant_name(&self) -> &'static str {".into());
lines.push(" match *self {".into());
for v in e.variants.iter().filter(|v| v.enabled) {
lines.push(format!(
" {}::{} => \"{}\",",
e.name, v.name, v.name
));
}
lines.push(" }".into());
lines.push(" }\n".into());
//fn from_code(&str) -> Self
lines.push(" fn from_code(text: &str) -> Option<Self> {".into());
lines.push(" match text {".into());
for v in e.variants.iter().filter(|v| v.enabled) {
lines.push(format!(
" \"{}\" => Some({}::{}),",
v.code, e.name, v.name
));
}
lines.push(" _ => None,".into());
lines.push(" }".into());
lines.push(" }\n".into());
//end impl Enum
lines.push("}\n".into());
//impl Display
lines.push(format!("impl std::fmt::Display for {} {{", e.name));
lines.push(" fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {".into());
lines.push(" write!(f, \"{}\", self.constant_name())".into());
lines.push(" }".into());
lines.push("}\n".into());
//impl EnumPayload
lines.push(format!("impl EnumPayload for {} {{", e.name));
lines.push(" fn to_u32(&self) -> u32 {".into());
lines.push(" match *self {".into());
for (idx, v) in e.variants.iter().filter(|v| v.enabled).enumerate() {
lines.push(format!(" {}::{} => {},", e.name, v.name, idx));
}
lines.push(" }".into());
lines.push(" }\n".into());
lines.push(" fn from_u32(code: u32) -> Self {".into());
lines.push(" match code {".into());
for (idx, v) in e.variants.iter().filter(|v| v.enabled).enumerate() {
lines.push(format!(" {} => {}::{},", idx, e.name, v.name));
}
lines.push(format!(
" _ => panic!(\"unknown {} code: {{}}\", code),",
e.name
));
lines.push(" }".into());
lines.push(" }".into());
lines.push("}\n".into());
if let Some(all_name) = e.all_name {
//impl TryFrom
lines.push(format!(
"impl std::convert::TryFrom<{}> for {} {{",
all_name, e.name
));
lines.push(" type Error = DisabledVariant;".into());
lines.push(format!(
" fn try_from(value: {}) -> Result<{}, DisabledVariant> {{",
all_name, e.name,
));
lines.push(" match value {".into());
for v in e.variants.iter() {
if v.enabled {
lines.push(format!(
" {}::{} => Ok({}::{}),",
all_name, v.name, e.name, v.name
));
} else {
lines.push(format!(
" {}::{} => Err(DisabledVariant),",
all_name, v.name
));
}
}
lines.push(" }".into());
lines.push(" }".into());
lines.push("}\n".into());
//impl From
lines.push(format!(
"impl std::convert::From<{}> for {} {{",
e.name, all_name
));
lines.push(format!(" fn from(value: {}) -> {} {{", e.name, all_name));
lines.push(" match value {".into());
for v in e.variants.iter().filter(|v| v.enabled) {
lines.push(format!(
" {}::{} => {}::{},",
e.name, v.name, all_name, v.name
));
}
lines.push(" }".into());
lines.push(" }".into());
lines.push("}\n".into());
}
lines.join("\n")
}

View file

@ -0,0 +1,247 @@
{
"dial": {
"hob": "Hokkaido-ben",
"ksb": "Kansai-ben",
"ktb": "Kantou-ben",
"kyb": "Kyoto-ben",
"kyu": "Kyuushuu-ben",
"nab": "Nagano-ben",
"osb": "Osaka-ben",
"rkb": "Ryuukyuu-ben",
"thb": "Touhoku-ben",
"tsb": "Tosa-ben",
"tsug": "Tsugaru-ben"
},
"field": {
"Buddh": "Buddhism",
"Christn": "Christianity",
"MA": "martial arts",
"Shinto": "Shinto",
"agric": "agriculture",
"anat": "anatomy",
"archeol": "archeology",
"archit": "architecture, building",
"art": "art, aesthetics",
"astron": "astronomy",
"audvid": "audio-visual",
"aviat": "aviation",
"baseb": "baseball",
"biochem": "biochemistry",
"biol": "biology",
"bot": "botany",
"bus": "business",
"chem": "chemistry",
"comp": "computing",
"cryst": "crystallography",
"ecol": "ecology",
"econ": "economics",
"elec": "electricity, elec. eng.",
"electr": "electronics",
"embryo": "embryology",
"engr": "engineering",
"ent": "entomology",
"finc": "finance",
"fish": "fishing",
"food": "food, cooking",
"gardn": "gardening, horticulture",
"genet": "genetics",
"geogr": "geography",
"geol": "geology",
"geom": "geometry",
"go": "go (game)",
"golf": "golf",
"gramm": "grammar",
"grmyth": "Greek mythology",
"hanaf": "hanafuda",
"horse": "horse-racing",
"law": "law",
"ling": "linguistics",
"logic": "logic",
"mahj": "mahjong",
"math": "mathematics",
"mech": "mechanical engineering",
"med": "medicine",
"met": "climate, weather",
"mil": "military",
"music": "music",
"ornith": "ornithology",
"paleo": "paleontology",
"pathol": "pathology",
"pharm": "pharmacy",
"phil": "philosophy",
"photo": "photography",
"physics": "physics",
"physiol": "physiology",
"print": "printing",
"psych": "psychology, psychiatry",
"shogi": "shogi",
"sports": "sports",
"stat": "statistics",
"sumo": "sumo",
"telec": "telecommunications",
"tradem": "trademark",
"vidg": "video game",
"zool": "zoology"
},
"ke_inf": {
"ateji": "ateji (phonetic) reading",
"iK": "word containing irregular kanji usage",
"ik": "word containing irregular kana usage",
"io": "irregular okurigana usage",
"oK": "word containing out-dated kanji"
},
"misc": {
"X": "rude or X-rated term (not displayed in educational software)",
"abbr": "abbreviation",
"arch": "archaism",
"char": "character",
"chn": "children's language",
"col": "colloquialism",
"company": "company name",
"creat": "creature",
"dated": "dated term",
"dei": "deity",
"derog": "derogatory",
"ev": "event",
"fam": "familiar language",
"fem": "female term or language",
"fict": "fiction",
"given": "given name or forename, gender not specified",
"hist": "historical term",
"hon": "honorific or respectful (sonkeigo) language",
"hum": "humble (kenjougo) language",
"id": "idiomatic expression",
"joc": "jocular, humorous term",
"leg": "legend",
"litf": "literary or formal term",
"m-sl": "manga slang",
"male": "male term or language",
"myth": "mythology",
"net-sl": "Internet slang",
"obj": "object",
"obs": "obsolete term",
"obsc": "obscure term",
"on-mim": "onomatopoeic or mimetic word",
"organization": "organization name",
"oth": "other",
"person": "full name of a particular person",
"place": "place name",
"poet": "poetical term",
"pol": "polite (teineigo) language",
"product": "product name",
"proverb": "proverb",
"quote": "quotation",
"rare": "rare",
"relig": "religion",
"sens": "sensitive",
"serv": "service",
"sl": "slang",
"station": "railway station",
"surname": "family or surname",
"uk": "word usually written using kana alone",
"unclass": "unclassified name",
"vulg": "vulgar expression or word",
"work": "work of art, literature, music, etc. name",
"yoji": "yojijukugo"
},
"pos": {
"adj-f": "noun or verb acting prenominally",
"adj-i": "adjective (keiyoushi)",
"adj-ix": "adjective (keiyoushi) - yoi/ii class",
"adj-kari": "'kari' adjective (archaic)",
"adj-ku": "'ku' adjective (archaic)",
"adj-na": "adjectival nouns or quasi-adjectives (keiyodoshi)",
"adj-nari": "archaic/formal form of na-adjective",
"adj-no": "nouns which may take the genitive case particle 'no'",
"adj-pn": "pre-noun adjectival (rentaishi)",
"adj-shiku": "'shiku' adjective (archaic)",
"adj-t": "'taru' adjective",
"adv": "adverb (fukushi)",
"adv-to": "adverb taking the 'to' particle",
"aux": "auxiliary",
"aux-adj": "auxiliary adjective",
"aux-v": "auxiliary verb",
"conj": "conjunction",
"cop": "copula",
"ctr": "counter",
"exp": "expressions (phrases, clauses, etc.)",
"int": "interjection (kandoushi)",
"n": "noun (common) (futsuumeishi)",
"n-adv": "adverbial noun (fukushitekimeishi)",
"n-pr": "proper noun",
"n-pref": "noun, used as a prefix",
"n-suf": "noun, used as a suffix",
"n-t": "noun (temporal) (jisoumeishi)",
"num": "numeric",
"pn": "pronoun",
"pref": "prefix",
"prt": "particle",
"suf": "suffix",
"unc": "unclassified",
"v-unspec": "verb unspecified",
"v1": "Ichidan verb",
"v1-s": "Ichidan verb - kureru special class",
"v2a-s": "Nidan verb with 'u' ending (archaic)",
"v2b-k": "Nidan verb (upper class) with 'bu' ending (archaic)",
"v2b-s": "Nidan verb (lower class) with 'bu' ending (archaic)",
"v2d-k": "Nidan verb (upper class) with 'dzu' ending (archaic)",
"v2d-s": "Nidan verb (lower class) with 'dzu' ending (archaic)",
"v2g-k": "Nidan verb (upper class) with 'gu' ending (archaic)",
"v2g-s": "Nidan verb (lower class) with 'gu' ending (archaic)",
"v2h-k": "Nidan verb (upper class) with 'hu/fu' ending (archaic)",
"v2h-s": "Nidan verb (lower class) with 'hu/fu' ending (archaic)",
"v2k-k": "Nidan verb (upper class) with 'ku' ending (archaic)",
"v2k-s": "Nidan verb (lower class) with 'ku' ending (archaic)",
"v2m-k": "Nidan verb (upper class) with 'mu' ending (archaic)",
"v2m-s": "Nidan verb (lower class) with 'mu' ending (archaic)",
"v2n-s": "Nidan verb (lower class) with 'nu' ending (archaic)",
"v2r-k": "Nidan verb (upper class) with 'ru' ending (archaic)",
"v2r-s": "Nidan verb (lower class) with 'ru' ending (archaic)",
"v2s-s": "Nidan verb (lower class) with 'su' ending (archaic)",
"v2t-k": "Nidan verb (upper class) with 'tsu' ending (archaic)",
"v2t-s": "Nidan verb (lower class) with 'tsu' ending (archaic)",
"v2w-s": "Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic)",
"v2y-k": "Nidan verb (upper class) with 'yu' ending (archaic)",
"v2y-s": "Nidan verb (lower class) with 'yu' ending (archaic)",
"v2z-s": "Nidan verb (lower class) with 'zu' ending (archaic)",
"v4b": "Yodan verb with 'bu' ending (archaic)",
"v4g": "Yodan verb with 'gu' ending (archaic)",
"v4h": "Yodan verb with 'hu/fu' ending (archaic)",
"v4k": "Yodan verb with 'ku' ending (archaic)",
"v4m": "Yodan verb with 'mu' ending (archaic)",
"v4n": "Yodan verb with 'nu' ending (archaic)",
"v4r": "Yodan verb with 'ru' ending (archaic)",
"v4s": "Yodan verb with 'su' ending (archaic)",
"v4t": "Yodan verb with 'tsu' ending (archaic)",
"v5aru": "Godan verb - -aru special class",
"v5b": "Godan verb with 'bu' ending",
"v5g": "Godan verb with 'gu' ending",
"v5k": "Godan verb with 'ku' ending",
"v5k-s": "Godan verb - Iku/Yuku special class",
"v5m": "Godan verb with 'mu' ending",
"v5n": "Godan verb with 'nu' ending",
"v5r": "Godan verb with 'ru' ending",
"v5r-i": "Godan verb with 'ru' ending (irregular verb)",
"v5s": "Godan verb with 'su' ending",
"v5t": "Godan verb with 'tsu' ending",
"v5u": "Godan verb with 'u' ending",
"v5u-s": "Godan verb with 'u' ending (special class)",
"v5uru": "Godan verb - Uru old class verb (old form of Eru)",
"vi": "intransitive verb",
"vk": "Kuru verb - special class",
"vn": "irregular nu verb",
"vr": "irregular ru verb, plain form ends with -ri",
"vs": "noun or participle which takes the aux. verb suru",
"vs-c": "su verb - precursor to the modern suru",
"vs-i": "suru verb - included",
"vs-s": "suru verb - special class",
"vt": "transitive verb",
"vz": "Ichidan verb - zuru verb (alternative form of -jiru verbs)"
},
"re_inf": {
"gikun": "gikun (meaning as reading) or jukujikun (special kanji reading)",
"ik": "word containing irregular kana usage",
"ok": "out-dated or obsolete kana usage",
"uK": "word usually written using kanji alone"
}
}

177
jmdict-enums/src/lib.rs Normal file
View file

@ -0,0 +1,177 @@
/*******************************************************************************
* Copyright 2021 Stefan Majewsky <majewsky@gmx.net>
* SPDX-License-Identifier: Apache-2.0
* Refer to the file "LICENSE" for details.
*******************************************************************************/
//! Autogenerated enums for the `jmdict` crate.
//!
//! This code is in a separate crate because, if we put it in the `jmdict` crate itself, its
//! `build.rs` could not import it.
//!
//! # Compatibility promise
//!
//! **There is none.** This crate can disappear at any time if we choose to restructure the build
//! system for the `jmdict` crate. To use the types from this crate, look at the re-exports of the
//! same name in [the `jmdict` crate](https://docs.rs/jmdict/).
///Error type for all enum conversions of the form `impl TryFrom<AllFoo> for Foo`.
///
///The error is returned for variants from the full enum that are disabled in the main enum because
///of the compile-time configuration. For example:
///
///```
///# use jmdict_enums::*;
///use std::convert::TryInto;
///let val: Result<PartOfSpeech, _> = AllPartOfSpeech::NariAdjective.try_into();
///#[cfg(feature = "scope-archaic")]
///assert_eq!(val, Ok(PartOfSpeech::NariAdjective));
///#[cfg(not(feature = "scope-archaic"))]
///assert_eq!(val, Err(DisabledVariant));
///```
#[derive(Clone, Copy, Default, Hash, PartialEq, Eq, Debug)]
pub struct DisabledVariant;
///Internal helper functions for serialization and deserialization of enum values.
///
///This is an internal trait; it is not re-exported by the `jmdict` crate and thus not part of the
///public API.
pub trait EnumPayload {
fn to_u32(&self) -> u32;
fn from_u32(code: u32) -> Self;
}
///Common methods provided by all enums in this crate.
pub trait Enum: Sized {
///Returns the string that marks this enum variant in the JMdict. For values that JMdict
///represents as XML entities, only the entity name is returned, e.g. `adj-n` instead of
///`&adj-n;`.
fn code(&self) -> &'static str;
///Parses a representation from the JMdict file into a value of this enum. This is the reverse
///of `self.code()`, i.e. `Self::from_code(self.code()) == Some(self)`.
fn from_code(code: &str) -> Option<Self>;
///Returns the variant name. This is used to generate Rust code for this enum. The `impl
///Display` for enums uses this same representation.
fn constant_name(&self) -> &'static str;
}
///PriorityInCorpus appears in struct [Priority]. It describes how often a dictionary entry
///appears in a certain corpus of text.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum PriorityInCorpus {
///The vocabulary appears often within the given corpus.
Primary,
///The vocabulary appears within the given corpus, but not particularly often.
Secondary,
///The vocabulary does not appear in the given corpus. This is the `Default::default()` value.
Absent,
}
impl Default for PriorityInCorpus {
fn default() -> Self {
Self::Absent
}
}
impl PriorityInCorpus {
fn to_repr(&self) -> u32 {
match *self {
Self::Absent => 0,
Self::Primary => 1,
Self::Secondary => 2,
}
}
fn from_repr(code: u32) -> Self {
match code {
0 => Self::Absent,
1 => Self::Primary,
2 => Self::Secondary,
_ => panic!("invalid PriorityInCorpus code: {}", code),
}
}
}
///Relative priority of a ReadingElement or KanjiElement.
///
///The various fields indicate if the vocabulary appears in various references, which can be taken
///as an indivication of the frequency with which it is used.
///
///For the sake of encoding efficiency, this struct is not a perfect representation of the data in
///the JMdict. Some entries in the JMdict are marked with contradictory priority information. In
///this case, `Priority` will only contain the values corresponding to the highest priority. For
///example, a priority of `ichi1,ichi2,news1,nf09` is represented as:
///
///```
///# use jmdict_enums::{PriorityInCorpus::*, Priority};
///let p = Priority {
/// news: Primary,
/// ichimango: Primary, //"ichi2" gets ignored
/// loanwords: Absent,
/// additional: Absent,
/// frequency_bucket: 9,
///};
///```
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
pub struct Priority {
///If not `Absent`, this vocabulary appears in the wordfreq file compiled by Alexandre Girardi
///from the Mainichi Shimbun. (A copy of the file can be obtained from the EDRDG.)
pub news: PriorityInCorpus,
///If not `Absent`, this vocabulary appears in the book "1万語語彙分類集" (Ichimango goi
///bunruishuu) by Senmon Kyouiku Publishing, Tokyo, 1998. The entries with priority `Secondary`
///were demoted from `Primary` because they were observed to have low frequencies in the WWW
///and newspapers.
pub ichimango: PriorityInCorpus,
///If not `Absent`, this vocabulary is a common loanword that appears in the wordfreq file.
pub loanwords: PriorityInCorpus,
///This covers a small number of words when they are detected as being common, but are not
///included in the above corpora.
pub additional: PriorityInCorpus,
///If `self.news != Absent`, this field contains a value between 1 and 48, indicating the
///frequency-of-use ranking for this vocabulary in the wordfreq file. The value 1 is used for
///the 500 most common words, the value 2 is used for the 500 next most common words, and so
///on. If `self.news == Absent`, this value will be 0.
pub frequency_bucket: u16,
}
impl Priority {
///Indicates whether this is a common vocabulary. This follows the same logic as the `(P)`
///markers in the EDICT and EDICT2 files: A word is common if any of its `PriorityInCorpus`
///fields is `Primary`, or if `self.additional == Secondary`.
pub fn is_common(&self) -> bool {
use PriorityInCorpus::*;
self.news == Primary
|| self.ichimango == Primary
|| self.loanwords == Primary
|| self.additional != Absent
}
}
//Priority gets serialized into u32, same as the enum types. The lower 16 bits are used for the
//frequency buckets. The higher 16 bits are evenly distributed among the four PriorityInCorpus
//fields. The encoding could be denser if we wanted to, but u32 is the smallest encoding unit
//available to us anyway, so we don't need to bother.
impl EnumPayload for Priority {
fn to_u32(&self) -> u32 {
let mut result = self.frequency_bucket as u32;
result |= self.news.to_repr() << 16;
result |= self.ichimango.to_repr() << 20;
result |= self.loanwords.to_repr() << 24;
result |= self.additional.to_repr() << 28;
result
}
fn from_u32(code: u32) -> Self {
Self {
news: PriorityInCorpus::from_repr((code & 0xF0000) >> 16),
ichimango: PriorityInCorpus::from_repr((code & 0xF00000) >> 20),
loanwords: PriorityInCorpus::from_repr((code & 0xF000000) >> 24),
additional: PriorityInCorpus::from_repr((code & 0xF0000000) >> 28),
frequency_bucket: (code & 0xFFFF) as u16,
}
}
}
include!(concat!(env!("OUT_DIR"), "/generated.rs"));