Skip to content

Commit 842acf9

Browse files
committed
Align RLEv1 and RLEv2 structure
1 parent a35cc36 commit 842acf9

File tree

2 files changed

+114
-75
lines changed

2 files changed

+114
-75
lines changed

src/encoding/rle_v1.rs

Lines changed: 78 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,36 @@ use super::{
3030

3131
const MAX_RUN_LENGTH: usize = 130;
3232

33+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
34+
// TODO: put header data in here, e.g. base value, len, etc.
35+
enum EncodingType {
36+
Run { length: usize },
37+
Literals { length: usize },
38+
}
39+
40+
impl EncodingType {
41+
/// Decode header byte to determine sub-encoding.
42+
/// Runs start with a positive byte, and literals with a negative byte.
43+
#[inline]
44+
fn from_header(header: u8) -> Self {
45+
let header = header as i8;
46+
if header < 0 {
47+
let length = header.unsigned_abs() as usize;
48+
Self::Literals { length }
49+
} else {
50+
// Technically +3 but we subtract 1 for the base
51+
let length = header as u8 as usize + 2;
52+
Self::Run { length }
53+
}
54+
}
55+
}
56+
3357
/// Decodes a stream of Integer Run Length Encoded version 1 bytes.
3458
pub struct RleReaderV1<N: NInt, R: Read, S: EncodingSign> {
3559
reader: R,
3660
decoded_ints: Vec<N>,
3761
current_head: usize,
38-
phantom: PhantomData<S>,
62+
sign: PhantomData<S>,
3963
}
4064

4165
impl<N: NInt, R: Read, S: EncodingSign> RleReaderV1<N, R, S> {
@@ -44,56 +68,71 @@ impl<N: NInt, R: Read, S: EncodingSign> RleReaderV1<N, R, S> {
4468
reader,
4569
decoded_ints: Vec::with_capacity(MAX_RUN_LENGTH),
4670
current_head: 0,
47-
phantom: Default::default(),
71+
sign: Default::default(),
4872
}
4973
}
5074

5175
fn decode_batch(&mut self) -> Result<()> {
5276
self.current_head = 0;
5377
self.decoded_ints.clear();
54-
match try_read_u8(&mut self.reader)?.map(|byte| byte as i8) {
55-
// Literals
56-
Some(byte) if byte < 0 => {
57-
let length = byte.unsigned_abs();
58-
for _ in 0..length {
59-
let lit = read_varint_zigzagged::<_, _, S>(&mut self.reader)?;
60-
self.decoded_ints.push(lit);
61-
}
62-
Ok(())
78+
let header = match try_read_u8(&mut self.reader)? {
79+
Some(byte) => byte,
80+
None => return Ok(()),
81+
};
82+
83+
match EncodingType::from_header(header) {
84+
EncodingType::Literals { length } => {
85+
read_literals::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length)
6386
}
64-
// Run
65-
Some(byte) => {
66-
let byte = byte as u8;
67-
let length = byte + 2; // Technically +3, but we subtract 1 for the base
68-
let delta = read_u8(&mut self.reader)? as i8;
69-
let mut base = read_varint_zigzagged::<_, _, S>(&mut self.reader)?;
70-
self.decoded_ints.push(base);
71-
if delta < 0 {
72-
let delta = delta.unsigned_abs();
73-
let delta = N::from_u8(delta);
74-
for _ in 0..length {
75-
base = base.checked_sub(&delta).context(OutOfSpecSnafu {
76-
msg: "over/underflow when decoding patched base integer",
77-
})?;
78-
self.decoded_ints.push(base);
79-
}
80-
} else {
81-
let delta = delta as u8;
82-
let delta = N::from_u8(delta);
83-
for _ in 0..length {
84-
base = base.checked_add(&delta).context(OutOfSpecSnafu {
85-
msg: "over/underflow when decoding patched base integer",
86-
})?;
87-
self.decoded_ints.push(base);
88-
}
89-
}
90-
Ok(())
87+
EncodingType::Run { length } => {
88+
read_run::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length)
9189
}
92-
None => Ok(()),
9390
}
9491
}
9592
}
9693

94+
fn read_literals<N: NInt, R: Read, S: EncodingSign>(
95+
reader: &mut R,
96+
out_ints: &mut Vec<N>,
97+
length: usize,
98+
) -> Result<()> {
99+
for _ in 0..length {
100+
let lit = read_varint_zigzagged::<_, _, S>(reader)?;
101+
out_ints.push(lit);
102+
}
103+
Ok(())
104+
}
105+
106+
fn read_run<N: NInt, R: Read, S: EncodingSign>(
107+
reader: &mut R,
108+
out_ints: &mut Vec<N>,
109+
length: usize,
110+
) -> Result<()> {
111+
let delta = read_u8(reader)? as i8;
112+
let mut base = read_varint_zigzagged::<_, _, S>(reader)?;
113+
out_ints.push(base);
114+
if delta < 0 {
115+
let delta = delta.unsigned_abs();
116+
let delta = N::from_u8(delta);
117+
for _ in 0..length {
118+
base = base.checked_sub(&delta).context(OutOfSpecSnafu {
119+
msg: "over/underflow when decoding patched base integer",
120+
})?;
121+
out_ints.push(base);
122+
}
123+
} else {
124+
let delta = delta as u8;
125+
let delta = N::from_u8(delta);
126+
for _ in 0..length {
127+
base = base.checked_add(&delta).context(OutOfSpecSnafu {
128+
msg: "over/underflow when decoding patched base integer",
129+
})?;
130+
out_ints.push(base);
131+
}
132+
}
133+
Ok(())
134+
}
135+
97136
impl<N: NInt, R: Read, S: EncodingSign> PrimitiveValueDecoder<N> for RleReaderV1<N, R, S> {
98137
// TODO: this is exact duplicate from RLEv2 version; deduplicate it
99138
fn decode(&mut self, out: &mut [N]) -> Result<()> {

src/encoding/rle_v2/mod.rs

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,47 @@ const SHORT_REPEAT_MIN_LENGTH: usize = 3;
4747
const SHORT_REPEAT_MAX_LENGTH: usize = 10;
4848
const BASE_VALUE_LIMIT: i64 = 1 << 56;
4949

50-
// TODO: switch to read from Bytes directly?
50+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51+
// TODO: put header data in here, e.g. base value, len, etc.
52+
enum EncodingType {
53+
ShortRepeat,
54+
Direct,
55+
PatchedBase,
56+
Delta,
57+
}
58+
59+
impl EncodingType {
60+
/// Checking highest two bits for encoding type.
61+
#[inline]
62+
fn from_header(header: u8) -> Self {
63+
match header & 0b_1100_0000 {
64+
0b_1100_0000 => Self::Delta,
65+
0b_1000_0000 => Self::PatchedBase,
66+
0b_0100_0000 => Self::Direct,
67+
0b_0000_0000 => Self::ShortRepeat,
68+
_ => unreachable!(),
69+
}
70+
}
71+
72+
/// Return byte with highest two bits set according to variant.
73+
#[inline]
74+
fn to_header(self) -> u8 {
75+
match self {
76+
EncodingType::Delta => 0b_1100_0000,
77+
EncodingType::PatchedBase => 0b_1000_0000,
78+
EncodingType::Direct => 0b_0100_0000,
79+
EncodingType::ShortRepeat => 0b_0000_0000,
80+
}
81+
}
82+
}
83+
5184
pub struct RleReaderV2<N: NInt, R: Read, S: EncodingSign> {
5285
reader: R,
5386
decoded_ints: Vec<N>,
5487
/// Indexes into decoded_ints to make it act like a queue
5588
current_head: usize,
5689
deltas: Vec<i64>,
57-
phantom: PhantomData<S>,
90+
sign: PhantomData<S>,
5891
}
5992

6093
impl<N: NInt, R: Read, S: EncodingSign> RleReaderV2<N, R, S> {
@@ -64,7 +97,7 @@ impl<N: NInt, R: Read, S: EncodingSign> RleReaderV2<N, R, S> {
6497
decoded_ints: Vec::with_capacity(MAX_RUN_LENGTH),
6598
current_head: 0,
6699
deltas: Vec::with_capacity(MAX_RUN_LENGTH),
67-
phantom: Default::default(),
100+
sign: Default::default(),
68101
}
69102
}
70103

@@ -503,39 +536,6 @@ fn determine_variable_run_encoding<N: NInt, S: EncodingSign>(
503536
}
504537
}
505538

506-
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
507-
enum EncodingType {
508-
ShortRepeat,
509-
Direct,
510-
PatchedBase,
511-
Delta,
512-
}
513-
514-
impl EncodingType {
515-
/// Checking highest two bits for encoding type.
516-
#[inline]
517-
fn from_header(header: u8) -> Self {
518-
match header & 0b_1100_0000 {
519-
0b_1100_0000 => Self::Delta,
520-
0b_1000_0000 => Self::PatchedBase,
521-
0b_0100_0000 => Self::Direct,
522-
0b_0000_0000 => Self::ShortRepeat,
523-
_ => unreachable!(),
524-
}
525-
}
526-
527-
/// Return byte with highest two bits set according to variant.
528-
#[inline]
529-
fn to_header(self) -> u8 {
530-
match self {
531-
EncodingType::Delta => 0b_1100_0000,
532-
EncodingType::PatchedBase => 0b_1000_0000,
533-
EncodingType::Direct => 0b_0100_0000,
534-
EncodingType::ShortRepeat => 0b_0000_0000,
535-
}
536-
}
537-
}
538-
539539
#[cfg(test)]
540540
mod tests {
541541

0 commit comments

Comments
 (0)