shave 4 bytes off the Gloss and LoanwordSource representations

This saves 5.29 MiB of payload when compiling with `--features full`.
The total payload in that configuration is now 66.72 MiB
(breaking down into 30.75 MiB raw text plus 35.97 MiB metadata).
main
Stefan Majewsky 3 years ago
parent 5ceeec3acc
commit 45319ccf60

@ -277,7 +277,7 @@ impl ToPayload for jmdict_traverse::RawSense<'_> {
impl ToPayload for jmdict_traverse::RawLSource<'_> {
fn size() -> usize {
5
4
}
fn encode_one(&self, omni: &mut OmniBuffer, buf: &mut [u32]) {
@ -287,26 +287,29 @@ impl ToPayload for jmdict_traverse::RawLSource<'_> {
let r = omni.push_str(self.lang);
buf[2] = r.start;
buf[3] = r.end;
buf[4] = 0;
//`omni.text` is significantly shorter than 2^28 bytes, so we can shove those two booleans
//into the highest bits of one of the offset values
if self.is_partial {
buf[4] |= 0x1;
buf[0] |= 0x10000000;
}
if self.is_wasei {
buf[4] |= 0x2;
buf[0] |= 0x20000000;
}
}
}
impl ToPayload for jmdict_traverse::RawGloss<'_> {
fn size() -> usize {
3
2
}
fn encode_one(&self, omni: &mut OmniBuffer, buf: &mut [u32]) {
//`omni.text` is never larger than 30-40 MiB. That's slightly more than 2^24 bytes, but
//comfortably below 2^28 bytes. We can therefore use the upper 4 bits of `buf[0]` and
//`buf[1]`, respectively, to encode `self.lang` and `self.g_type`.
let r = omni.push_str(self.text);
buf[0] = r.start;
buf[1] = r.end;
buf[2] = self.lang.to_u32() | (self.g_type.to_u32() << 16);
buf[0] = r.start | (self.lang.to_u32() << 28);
buf[1] = r.end | (self.g_type.to_u32() << 28);
}
}

@ -341,9 +341,9 @@ wrap_iterator!(&'static str, 2, Strings);
wrap_iterator!(PartOfSpeech, 1, PartsOfSpeech);
wrap_iterator!(SenseTopic, 1, SenseTopics);
wrap_iterator!(SenseInfo, 1, SenseInfos);
wrap_iterator!(LoanwordSource, 5, LoanwordSources);
wrap_iterator!(LoanwordSource, 4, LoanwordSources);
wrap_iterator!(Dialect, 1, Dialects);
wrap_iterator!(Gloss, 3, Glosses);
wrap_iterator!(Gloss, 2, Glosses);
///An iterator providing fast access to objects in the database. Instances of this iterator
///can be copied cheaply.

@ -167,13 +167,13 @@ impl FromPayload<1> for SenseInfo {
}
}
impl FromPayload<5> for LoanwordSource {
fn get(data: &[u32; 5]) -> Self {
impl FromPayload<4> for LoanwordSource {
fn get(data: &[u32; 4]) -> Self {
Self {
text: get_str(data[0], data[1]),
text: get_str(data[0] & 0x0FFFFFFF, data[1]),
language: get_str(data[2], data[3]),
is_partial: (data[4] & 0x1) == 0x1,
is_wasei: (data[4] & 0x2) == 0x2,
is_partial: (data[0] & 0x10000000) == 0x10000000,
is_wasei: (data[0] & 0x20000000) == 0x20000000,
}
}
}
@ -184,12 +184,12 @@ impl FromPayload<1> for Dialect {
}
}
impl FromPayload<3> for Gloss {
fn get(data: &[u32; 3]) -> Self {
let lang_code = data[2] & 0x0000FFFF;
let type_code = (data[2] & 0xFFFF0000) >> 16;
impl FromPayload<2> for Gloss {
fn get(data: &[u32; 2]) -> Self {
let lang_code = (data[0] & 0xF0000000) >> 28;
let type_code = (data[1] & 0xF0000000) >> 28;
Gloss {
text: get_str(data[0], data[1]),
text: get_str(data[0] & 0x0FFFFFFF, data[1] & 0x0FFFFFFF),
language: jmdict_enums::EnumPayload::from_u32(lang_code),
gloss_type: jmdict_enums::EnumPayload::from_u32(type_code),
}

Loading…
Cancel
Save