generated from ElnuDev/rust-project
Compare commits
2 commits
30ee7577eb
...
c2b2147e04
Author | SHA1 | Date | |
---|---|---|---|
c2b2147e04 | |||
2f54f16864 |
6 changed files with 58 additions and 34 deletions
11
Cargo.lock
generated
11
Cargo.lock
generated
|
@ -32,6 +32,15 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
|
@ -136,6 +145,7 @@ dependencies = [
|
|||
"codes-agency",
|
||||
"codes-common",
|
||||
"csv",
|
||||
"serde",
|
||||
"tera",
|
||||
]
|
||||
|
||||
|
@ -315,6 +325,7 @@ version = "0.1.0"
|
|||
name = "jichanorg-jmdict"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"codes-iso-639",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
1
jmdict/.gitignore
vendored
Normal file
1
jmdict/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
JMdict*.bin
|
|
@ -6,6 +6,7 @@ edition = "2021"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
bincode = "1.3.3"
|
||||
serde = { version = "1.0.175", features = ["serde_derive"] }
|
||||
serde_json = "1.0.103"
|
||||
|
||||
|
@ -13,5 +14,4 @@ serde_json = "1.0.103"
|
|||
git = "https://github.com/ElnuDev/rust-codes.git"
|
||||
rev = "bea9e74d0ec2aaec0b74c2fa6f8da0490a638090"
|
||||
version = "0.1.5"
|
||||
default-features = false # no serde
|
||||
features = ["part_2"]
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
use codes_iso_639::part_2::LanguageCode;
|
||||
use serde::Deserialize;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
use crate::serial::*;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct Jmdict {
|
||||
pub entries: Vec<JmdictEntry>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictEntry {
|
||||
pub sequence: i32,
|
||||
|
@ -19,14 +19,16 @@ pub struct JmdictEntry {
|
|||
pub sense: Vec<JmdictSense>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictExample {
|
||||
pub srce: JmdictExampleSource,
|
||||
// Always empty with latest version of JMdict
|
||||
#[serde(default, skip_serializing, deserialize_with = "err_if_not_blank")]
|
||||
srcaswdfa: serde::de::IgnoredAny,
|
||||
pub sentences: Vec<JmdictExampleSentence>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictExampleSentence {
|
||||
#[serde(deserialize_with = "lang")]
|
||||
|
@ -34,33 +36,26 @@ pub struct JmdictExampleSentence {
|
|||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictExampleSource {
|
||||
pub id: String,
|
||||
pub src_type: JmdictExampleSourceSrcType,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub enum JmdictExampleSourceSrcType {
|
||||
Amogus,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictGlossary {
|
||||
pub content: String,
|
||||
#[serde(deserialize_with = "lang_default_eng")]
|
||||
pub language: LanguageCode,
|
||||
// Always empty with latest version of JMdict
|
||||
#[serde(default, skip_serializing, deserialize_with = "err_if_not_null")]
|
||||
#[serde(default, skip_serializing, deserialize_with = "err_if_not_blank")]
|
||||
gender: serde::de::IgnoredAny,
|
||||
pub r#type: Option<JmdictGlossaryType>,
|
||||
}
|
||||
|
||||
// https://github.com/FooSoft/yomichan/issues/2057
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub enum JmdictGlossaryType {
|
||||
#[serde(rename = "lit")]
|
||||
Literal,
|
||||
|
@ -72,7 +67,7 @@ pub enum JmdictGlossaryType {
|
|||
Trademark,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictKanji {
|
||||
pub expression: String,
|
||||
|
@ -82,12 +77,12 @@ pub struct JmdictKanji {
|
|||
pub priorities: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictReading {
|
||||
pub reading: String,
|
||||
// Always empty with latest version of JMdict
|
||||
#[serde(default, skip_serializing, deserialize_with = "err_if_not_null")]
|
||||
#[serde(default, skip_serializing, deserialize_with = "err_if_not_blank")]
|
||||
no_kanji: serde::de::IgnoredAny,
|
||||
#[serde(deserialize_with = "nullable_list")]
|
||||
pub restrictions: Vec<String>,
|
||||
|
@ -97,7 +92,7 @@ pub struct JmdictReading {
|
|||
pub priorities: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictSense {
|
||||
#[serde(deserialize_with = "nullable_list")]
|
||||
|
@ -126,7 +121,7 @@ pub struct JmdictSense {
|
|||
pub examples: Vec<JmdictExample>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(rename_all(deserialize = "PascalCase"), deny_unknown_fields)]
|
||||
pub struct JmdictSource {
|
||||
pub content: String,
|
||||
|
|
|
@ -84,13 +84,14 @@ where
|
|||
.map_err(|err| serde::de::Error::custom(err))
|
||||
}
|
||||
|
||||
pub fn err_if_not_null<'de, D>(deserializer: D) -> Result<serde::de::IgnoredAny, D::Error>
|
||||
pub fn err_if_not_blank<'de, D>(deserializer: D) -> Result<serde::de::IgnoredAny, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
match serde_json::Value::deserialize(deserializer)? {
|
||||
Value::Null => Ok(serde::de::IgnoredAny),
|
||||
Value::String(str) if str.is_empty() => Ok(serde::de::IgnoredAny),
|
||||
_ => Err(serde::de::Error::custom("Expected null or empty string!")),
|
||||
let value = serde_json::Value::deserialize(deserializer)?;
|
||||
if value.is_null() || value == "" {
|
||||
Ok(serde::de::IgnoredAny)
|
||||
} else {
|
||||
Err(serde::de::Error::custom("Expected null or empty string!"))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
use std::fs;
|
||||
use std::{fs::{self, File}, io::{Write, Read}};
|
||||
|
||||
use crate::jmdict::Jmdict;
|
||||
|
||||
#[test]
|
||||
fn jmdict() {
|
||||
let contents = fs::read_dir("loader")
|
||||
fn get_jmdict_json() -> String {
|
||||
fs::read_dir("loader")
|
||||
.unwrap()
|
||||
.filter_map(|file| {
|
||||
let file = file.unwrap();
|
||||
|
@ -16,6 +15,23 @@ fn jmdict() {
|
|||
})
|
||||
.next()
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let _jmdict: Jmdict = serde_json::from_str(&contents).unwrap();
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
const BIN_PATH: &str = "JMdict.bin";
|
||||
|
||||
#[test]
|
||||
fn jmdict_load_and_serialize() {
|
||||
let jmdict: Jmdict = serde_json::from_str(&get_jmdict_json()).unwrap();
|
||||
let encoded: Vec<u8> = bincode::serialize(&jmdict).unwrap();
|
||||
let mut file = File::create(BIN_PATH).unwrap();
|
||||
file.write_all(&encoded).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jmdict_deserialize() {
|
||||
let mut file = File::open(BIN_PATH).unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer).unwrap();
|
||||
let _jmdict: Jmdict = bincode::deserialize(&buffer).unwrap();
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue