mirror of
https://github.com/ElnuDev/rust-jmdict.git
synced 2025-05-12 01:45:52 -07:00
shave 4 bytes off the Gloss and LoanwordSource representations
This saves 5.29 MiB of payload when compiling with `--features full`. The total payload in that configuration is now 66.72 MiB (breaking down into 30.75 MiB raw text plus 35.97 MiB metadata).
This commit is contained in:
parent
5ceeec3acc
commit
45319ccf60
3 changed files with 23 additions and 20 deletions
19
build.rs
19
build.rs
|
@ -277,7 +277,7 @@ impl ToPayload for jmdict_traverse::RawSense<'_> {
|
||||||
|
|
||||||
impl ToPayload for jmdict_traverse::RawLSource<'_> {
|
impl ToPayload for jmdict_traverse::RawLSource<'_> {
|
||||||
fn size() -> usize {
|
fn size() -> usize {
|
||||||
5
|
4
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_one(&self, omni: &mut OmniBuffer, buf: &mut [u32]) {
|
fn encode_one(&self, omni: &mut OmniBuffer, buf: &mut [u32]) {
|
||||||
|
@ -287,26 +287,29 @@ impl ToPayload for jmdict_traverse::RawLSource<'_> {
|
||||||
let r = omni.push_str(self.lang);
|
let r = omni.push_str(self.lang);
|
||||||
buf[2] = r.start;
|
buf[2] = r.start;
|
||||||
buf[3] = r.end;
|
buf[3] = r.end;
|
||||||
buf[4] = 0;
|
//`omni.text` is significantly shorter than 2^28 bytes, so we can shove those two booleans
|
||||||
|
//into the highest bits of one of the offset values
|
||||||
if self.is_partial {
|
if self.is_partial {
|
||||||
buf[4] |= 0x1;
|
buf[0] |= 0x10000000;
|
||||||
}
|
}
|
||||||
if self.is_wasei {
|
if self.is_wasei {
|
||||||
buf[4] |= 0x2;
|
buf[0] |= 0x20000000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ToPayload for jmdict_traverse::RawGloss<'_> {
|
impl ToPayload for jmdict_traverse::RawGloss<'_> {
|
||||||
fn size() -> usize {
|
fn size() -> usize {
|
||||||
3
|
2
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_one(&self, omni: &mut OmniBuffer, buf: &mut [u32]) {
|
fn encode_one(&self, omni: &mut OmniBuffer, buf: &mut [u32]) {
|
||||||
|
//`omni.text` is never larger than 30-40 MiB. That's slightly more than 2^24 bytes, but
|
||||||
|
//comfortably below 2^28 bytes. We can therefore use the upper 4 bits of `buf[0]` and
|
||||||
|
//`buf[1]`, respectively, to encode `self.lang` and `self.g_type`.
|
||||||
let r = omni.push_str(self.text);
|
let r = omni.push_str(self.text);
|
||||||
buf[0] = r.start;
|
buf[0] = r.start | (self.lang.to_u32() << 28);
|
||||||
buf[1] = r.end;
|
buf[1] = r.end | (self.g_type.to_u32() << 28);
|
||||||
buf[2] = self.lang.to_u32() | (self.g_type.to_u32() << 16);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -341,9 +341,9 @@ wrap_iterator!(&'static str, 2, Strings);
|
||||||
wrap_iterator!(PartOfSpeech, 1, PartsOfSpeech);
|
wrap_iterator!(PartOfSpeech, 1, PartsOfSpeech);
|
||||||
wrap_iterator!(SenseTopic, 1, SenseTopics);
|
wrap_iterator!(SenseTopic, 1, SenseTopics);
|
||||||
wrap_iterator!(SenseInfo, 1, SenseInfos);
|
wrap_iterator!(SenseInfo, 1, SenseInfos);
|
||||||
wrap_iterator!(LoanwordSource, 5, LoanwordSources);
|
wrap_iterator!(LoanwordSource, 4, LoanwordSources);
|
||||||
wrap_iterator!(Dialect, 1, Dialects);
|
wrap_iterator!(Dialect, 1, Dialects);
|
||||||
wrap_iterator!(Gloss, 3, Glosses);
|
wrap_iterator!(Gloss, 2, Glosses);
|
||||||
|
|
||||||
///An iterator providing fast access to objects in the database. Instances of this iterator
|
///An iterator providing fast access to objects in the database. Instances of this iterator
|
||||||
///can be copied cheaply.
|
///can be copied cheaply.
|
||||||
|
|
|
@ -167,13 +167,13 @@ impl FromPayload<1> for SenseInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromPayload<5> for LoanwordSource {
|
impl FromPayload<4> for LoanwordSource {
|
||||||
fn get(data: &[u32; 5]) -> Self {
|
fn get(data: &[u32; 4]) -> Self {
|
||||||
Self {
|
Self {
|
||||||
text: get_str(data[0], data[1]),
|
text: get_str(data[0] & 0x0FFFFFFF, data[1]),
|
||||||
language: get_str(data[2], data[3]),
|
language: get_str(data[2], data[3]),
|
||||||
is_partial: (data[4] & 0x1) == 0x1,
|
is_partial: (data[0] & 0x10000000) == 0x10000000,
|
||||||
is_wasei: (data[4] & 0x2) == 0x2,
|
is_wasei: (data[0] & 0x20000000) == 0x20000000,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -184,12 +184,12 @@ impl FromPayload<1> for Dialect {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromPayload<3> for Gloss {
|
impl FromPayload<2> for Gloss {
|
||||||
fn get(data: &[u32; 3]) -> Self {
|
fn get(data: &[u32; 2]) -> Self {
|
||||||
let lang_code = data[2] & 0x0000FFFF;
|
let lang_code = (data[0] & 0xF0000000) >> 28;
|
||||||
let type_code = (data[2] & 0xFFFF0000) >> 16;
|
let type_code = (data[1] & 0xF0000000) >> 28;
|
||||||
Gloss {
|
Gloss {
|
||||||
text: get_str(data[0], data[1]),
|
text: get_str(data[0] & 0x0FFFFFFF, data[1] & 0x0FFFFFFF),
|
||||||
language: jmdict_enums::EnumPayload::from_u32(lang_code),
|
language: jmdict_enums::EnumPayload::from_u32(lang_code),
|
||||||
gloss_type: jmdict_enums::EnumPayload::from_u32(type_code),
|
gloss_type: jmdict_enums::EnumPayload::from_u32(type_code),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue