optional memcmp, last-superseq optimizations
This commit is contained in:
+5
-3
@@ -15,8 +15,8 @@ fn main() {
|
|||||||
let header = &rply.header;
|
let header = &rply.header;
|
||||||
println!("{header:?}");
|
println!("{header:?}");
|
||||||
let mut header_out = header.clone();
|
let mut header_out = header.clone();
|
||||||
header_out.set_block_size(64);
|
header_out.set_block_size(128);
|
||||||
header_out.set_superblock_size(32);
|
header_out.set_superblock_size(128);
|
||||||
let mut out = encode(header_out, &rply.initial_state, &mut outfile).unwrap();
|
let mut out = encode(header_out, &rply.initial_state, &mut outfile).unwrap();
|
||||||
let mut frame = Frame::default();
|
let mut frame = Frame::default();
|
||||||
while let Ok(()) = rply
|
while let Ok(()) = rply
|
||||||
@@ -47,7 +47,7 @@ fn main() {
|
|||||||
Timer::DecodeFrame,
|
Timer::DecodeFrame,
|
||||||
Timer::DecodeCheckpoint,
|
Timer::DecodeCheckpoint,
|
||||||
Timer::DecodeStatestream,
|
Timer::DecodeStatestream,
|
||||||
Timer::EncodeStatestream,
|
Timer::EncodeFrame,
|
||||||
Timer::EncodeCheckpoint,
|
Timer::EncodeCheckpoint,
|
||||||
Timer::EncodeStatestream,
|
Timer::EncodeStatestream,
|
||||||
] {
|
] {
|
||||||
@@ -57,6 +57,8 @@ fn main() {
|
|||||||
println!("{timer:?}: {} ({avg_time:.8}ms avg)", times.count,);
|
println!("{timer:?}: {} ({avg_time:.8}ms avg)", times.count,);
|
||||||
}
|
}
|
||||||
for counter in [
|
for counter in [
|
||||||
|
Counter::DecSkippedSuperblocks,
|
||||||
|
Counter::DecSkippedBlocks,
|
||||||
Counter::EncReusedBlocks,
|
Counter::EncReusedBlocks,
|
||||||
Counter::EncReusedSuperblocks,
|
Counter::EncReusedSuperblocks,
|
||||||
Counter::EncSkippedBlocks,
|
Counter::EncSkippedBlocks,
|
||||||
|
|||||||
@@ -23,6 +23,8 @@ pub enum Counter {
|
|||||||
EncTotalSuperblocks,
|
EncTotalSuperblocks,
|
||||||
EncTotalKBsIn,
|
EncTotalKBsIn,
|
||||||
EncTotalKBsOut,
|
EncTotalKBsOut,
|
||||||
|
DecSkippedSuperblocks,
|
||||||
|
DecSkippedBlocks,
|
||||||
Count,
|
Count,
|
||||||
}
|
}
|
||||||
static TIME_ACC: [AtomicU64; Timer::Count as usize] = [
|
static TIME_ACC: [AtomicU64; Timer::Count as usize] = [
|
||||||
@@ -51,6 +53,8 @@ static COUNTS: [AtomicU64; Counter::Count as usize] = [
|
|||||||
AtomicU64::new(0),
|
AtomicU64::new(0),
|
||||||
AtomicU64::new(0),
|
AtomicU64::new(0),
|
||||||
AtomicU64::new(0),
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
];
|
];
|
||||||
|
|
||||||
pub struct Stopwatch(Timer, std::time::Instant);
|
pub struct Stopwatch(Timer, std::time::Instant);
|
||||||
|
|||||||
+62
-3
@@ -47,6 +47,7 @@ pub(crate) struct Ctx {
|
|||||||
last_superseq: Vec<u32>,
|
last_superseq: Vec<u32>,
|
||||||
block_index: BlockIndex<u8>,
|
block_index: BlockIndex<u8>,
|
||||||
superblock_index: BlockIndex<u32>,
|
superblock_index: BlockIndex<u32>,
|
||||||
|
use_encode_state_comparisons: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ctx {
|
impl Ctx {
|
||||||
@@ -58,6 +59,7 @@ impl Ctx {
|
|||||||
last_superseq: vec![],
|
last_superseq: vec![],
|
||||||
block_index: BlockIndex::new(block_size as usize),
|
block_index: BlockIndex::new(block_size as usize),
|
||||||
superblock_index: BlockIndex::new(superblock_size as usize),
|
superblock_index: BlockIndex::new(superblock_size as usize),
|
||||||
|
use_encode_state_comparisons: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -119,6 +121,7 @@ impl<R: std::io::Read> std::io::Read for Decoder<'_, '_, R> {
|
|||||||
/* a slightly degenerate read implementation in that it will keep
|
/* a slightly degenerate read implementation in that it will keep
|
||||||
* calling read on the inner reader until a complete checkpoint is
|
* calling read on the inner reader until a complete checkpoint is
|
||||||
* read, then return 0 for subsequent reads */
|
* read, then return 0 for subsequent reads */
|
||||||
|
#[allow(clippy::too_many_lines)]
|
||||||
fn read(&mut self, outbuf: &mut [u8]) -> std::io::Result<usize> {
|
fn read(&mut self, outbuf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
use ParseState as State;
|
use ParseState as State;
|
||||||
use rmp::decode as r;
|
use rmp::decode as r;
|
||||||
@@ -185,16 +188,38 @@ impl<R: std::io::Read> std::io::Read for Decoder<'_, '_, R> {
|
|||||||
(State::WaitForSuperblockSeq, SSToken::SuperblockSeq) => {
|
(State::WaitForSuperblockSeq, SSToken::SuperblockSeq) => {
|
||||||
let arr_len =
|
let arr_len =
|
||||||
r::read_array_len(self.reader).map_err(std::io::Error::other)? as usize;
|
r::read_array_len(self.reader).map_err(std::io::Error::other)? as usize;
|
||||||
|
let last_state_valid = self.ctx.last_superseq.len() >= arr_len
|
||||||
|
&& self.ctx.last_state.len() >= self.state_size;
|
||||||
let block_byte_size = self.ctx.block_size as usize;
|
let block_byte_size = self.ctx.block_size as usize;
|
||||||
let superblock_byte_size = self.ctx.superblock_size as usize * block_byte_size;
|
let superblock_byte_size = self.ctx.superblock_size as usize * block_byte_size;
|
||||||
let mut superseq = vec![0; arr_len];
|
let mut superseq = vec![0; arr_len];
|
||||||
self.ctx.last_state.resize(self.state_size, 0);
|
self.ctx.last_state.resize(self.state_size, 0);
|
||||||
|
let mut skipped_superblocks = 0;
|
||||||
|
let mut skipped_blocks = 0;
|
||||||
for (superblock_i, superseq_sblk) in superseq.iter_mut().enumerate() {
|
for (superblock_i, superseq_sblk) in superseq.iter_mut().enumerate() {
|
||||||
let superblock_idx =
|
let superblock_idx =
|
||||||
r::read_int(self.reader).map_err(std::io::Error::other)?;
|
r::read_int(self.reader).map_err(std::io::Error::other)?;
|
||||||
*superseq_sblk = superblock_idx;
|
*superseq_sblk = superblock_idx;
|
||||||
|
if last_state_valid
|
||||||
|
&& self.ctx.last_superseq[superblock_i] == superblock_idx
|
||||||
|
{
|
||||||
|
// no need to copy bytes
|
||||||
|
skipped_superblocks += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
let superblock_data = self.ctx.superblock_index.get(superblock_idx);
|
let superblock_data = self.ctx.superblock_index.get(superblock_idx);
|
||||||
for (block_i, block_id) in superblock_data.iter().copied().enumerate() {
|
for (block_i, block_id) in superblock_data.iter().copied().enumerate() {
|
||||||
|
if last_state_valid
|
||||||
|
&& self
|
||||||
|
.ctx
|
||||||
|
.superblock_index
|
||||||
|
.get(self.ctx.last_superseq[superblock_i])[block_i]
|
||||||
|
== block_id
|
||||||
|
{
|
||||||
|
// no need to copy bytes
|
||||||
|
skipped_blocks += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
let block_start = (superblock_i * superblock_byte_size
|
let block_start = (superblock_i * superblock_byte_size
|
||||||
+ block_i * block_byte_size)
|
+ block_i * block_byte_size)
|
||||||
.min(self.state_size);
|
.min(self.state_size);
|
||||||
@@ -208,6 +233,8 @@ impl<R: std::io::Read> std::io::Read for Decoder<'_, '_, R> {
|
|||||||
.copy_from_slice(&block_bytes[0..(block_end - block_start)]);
|
.copy_from_slice(&block_bytes[0..(block_end - block_start)]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
clock::count(Counter::DecSkippedSuperblocks, skipped_superblocks);
|
||||||
|
clock::count(Counter::DecSkippedBlocks, skipped_blocks);
|
||||||
self.ctx.last_superseq = superseq;
|
self.ctx.last_superseq = superseq;
|
||||||
state = State::Finished;
|
state = State::Finished;
|
||||||
self.finished = true;
|
self.finished = true;
|
||||||
@@ -274,20 +301,50 @@ impl<'w, 'c, W: std::io::Write> Encoder<'w, 'c, W> {
|
|||||||
let mut reused_blocks = 0;
|
let mut reused_blocks = 0;
|
||||||
let mut reused_superblocks = 0;
|
let mut reused_superblocks = 0;
|
||||||
let mut hashes = 0;
|
let mut hashes = 0;
|
||||||
|
let mut skipped_blocks = 0;
|
||||||
|
let mut memcmps = 0;
|
||||||
self.ctx
|
self.ctx
|
||||||
.last_superseq
|
.last_superseq
|
||||||
.resize(superblock_count.max(self.ctx.last_superseq.len()), 0);
|
.resize(superblock_count.max(self.ctx.last_superseq.len()), 0);
|
||||||
let mut superblock_contents = vec![0_u32; superblock_size];
|
let mut superblock_contents = vec![0_u32; superblock_size];
|
||||||
for (superblock_i, superblock_bytes) in checkpoint.chunks(superblock_size_bytes).enumerate()
|
let can_compare_saves = if self.ctx.last_state.len() < checkpoint.len() {
|
||||||
|
self.ctx.last_state.clear();
|
||||||
|
self.ctx.last_state.extend_from_slice(checkpoint);
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
self.ctx.last_state.truncate(checkpoint.len());
|
||||||
|
self.ctx.use_encode_state_comparisons
|
||||||
|
};
|
||||||
|
for (superblock_i, (superblock_bytes, last_state_superblock_bytes)) in (checkpoint
|
||||||
|
.chunks(superblock_size_bytes)
|
||||||
|
.zip(self.ctx.last_state.chunks(superblock_size_bytes)))
|
||||||
|
.enumerate()
|
||||||
{
|
{
|
||||||
|
/* maybe: skip superblocks */
|
||||||
if superblock_bytes.len() < superblock_size_bytes {
|
if superblock_bytes.len() < superblock_size_bytes {
|
||||||
let block_count = (superblock_bytes.len() - 1) / block_size + 1;
|
let block_count = (superblock_bytes.len() - 1) / block_size + 1;
|
||||||
if block_count + 1 < superblock_size {
|
if block_count + 1 < superblock_size {
|
||||||
superblock_contents[(block_count + 1)..].fill(0);
|
superblock_contents[(block_count + 1)..].fill(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (block_i, block_bytes) in superblock_bytes.chunks(block_size).enumerate() {
|
for (block_i, (block_bytes, last_state_block_bytes)) in (superblock_bytes
|
||||||
let found_block = if block_bytes.len() < block_size {
|
.chunks(block_size)
|
||||||
|
.zip(last_state_superblock_bytes.chunks(block_size)))
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
memcmps += u64::from(can_compare_saves);
|
||||||
|
let found_block = if can_compare_saves
|
||||||
|
&& block_bytes[..] == last_state_block_bytes[..block_bytes.len()]
|
||||||
|
{
|
||||||
|
skipped_blocks += 1;
|
||||||
|
blockindex::Insertion {
|
||||||
|
index: self
|
||||||
|
.ctx
|
||||||
|
.superblock_index
|
||||||
|
.get(self.ctx.last_superseq[superblock_i])[block_i],
|
||||||
|
is_new: false,
|
||||||
|
}
|
||||||
|
} else if block_bytes.len() < block_size {
|
||||||
padded_block[block_bytes.len()..].fill(0);
|
padded_block[block_bytes.len()..].fill(0);
|
||||||
padded_block[..block_bytes.len()].copy_from_slice(block_bytes);
|
padded_block[..block_bytes.len()].copy_from_slice(block_bytes);
|
||||||
hashes += 1;
|
hashes += 1;
|
||||||
@@ -337,6 +394,8 @@ impl<'w, 'c, W: std::io::Write> Encoder<'w, 'c, W> {
|
|||||||
}
|
}
|
||||||
clock::count(Counter::EncReusedBlocks, reused_blocks);
|
clock::count(Counter::EncReusedBlocks, reused_blocks);
|
||||||
clock::count(Counter::EncReusedSuperblocks, reused_superblocks);
|
clock::count(Counter::EncReusedSuperblocks, reused_superblocks);
|
||||||
|
clock::count(Counter::EncSkippedBlocks, skipped_blocks);
|
||||||
|
clock::count(Counter::EncMemCmps, memcmps);
|
||||||
clock::count(Counter::EncHashes, hashes);
|
clock::count(Counter::EncHashes, hashes);
|
||||||
self.ctx.last_superseq.truncate(superblock_count);
|
self.ctx.last_superseq.truncate(superblock_count);
|
||||||
bytes_out += rmp_size(r::write_uint(
|
bytes_out += rmp_size(r::write_uint(
|
||||||
|
|||||||
Reference in New Issue
Block a user