iroh_blobs/store/util/
sparse_mem_file.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
use std::{io, ops::Deref};

use bao_tree::io::{
    mixed::ReadBytesAt,
    sync::{ReadAt, Size, WriteAt},
};
use bytes::Bytes;
use range_collections::{range_set::RangeSetRange, RangeSet2};

/// A file that is sparse in memory
///
/// It is not actually using sparse storage to make reading faster, so it will
/// not conserve memory. It is just a way to remember the gaps so we can
/// write it to a file in a sparse way later.
#[derive(derive_more::Debug)]
pub struct SparseMemFile {
    /// The data, with gaps filled with zeros
    #[debug("{} bytes", data.len())]
    data: Vec<u8>,
    /// The ranges that are not zeros, so we can distinguish between zeros and gaps
    ranges: RangeSet2<usize>,
}

impl Default for SparseMemFile {
    fn default() -> Self {
        Self::new()
    }
}

impl From<Vec<u8>> for SparseMemFile {
    fn from(data: Vec<u8>) -> Self {
        let ranges = RangeSet2::from(0..data.len());
        Self { data, ranges }
    }
}

impl TryFrom<SparseMemFile> for Vec<u8> {
    type Error = io::Error;

    fn try_from(value: SparseMemFile) -> Result<Self, Self::Error> {
        let (data, ranges) = value.into_parts();
        if ranges == RangeSet2::from(0..data.len()) {
            Ok(data)
        } else {
            Err(io::Error::new(
                io::ErrorKind::InvalidData,
                "SparseMemFile has gaps",
            ))
        }
    }
}

impl SparseMemFile {
    /// Create a new, empty SparseMemFile
    pub fn new() -> Self {
        Self {
            data: Vec::new(),
            ranges: RangeSet2::empty(),
        }
    }

    /// Get the data and the valid ranges
    pub fn into_parts(self) -> (Vec<u8>, RangeSet2<usize>) {
        (self.data, self.ranges)
    }

    /// Persist the SparseMemFile to a WriteAt
    ///
    /// This will not persist the gaps, only the data that was written.
    pub fn persist(&self, mut target: impl WriteAt) -> io::Result<()> {
        let size = self.data.len();
        for range in self.ranges.iter() {
            let range = match range {
                RangeSetRange::Range(range) => *range.start..*range.end,
                RangeSetRange::RangeFrom(range) => *range.start..size,
            };
            let start = range.start.try_into().unwrap();
            let buf = &self.data[range];
            target.write_at(start, buf)?;
        }
        Ok(())
    }
}

impl AsRef<[u8]> for SparseMemFile {
    fn as_ref(&self) -> &[u8] {
        &self.data
    }
}

impl Deref for SparseMemFile {
    type Target = [u8];

    fn deref(&self) -> &Self::Target {
        &self.data
    }
}

impl ReadAt for SparseMemFile {
    fn read_at(&self, offset: u64, buf: &mut [u8]) -> io::Result<usize> {
        self.data.read_at(offset, buf)
    }
}

impl ReadBytesAt for SparseMemFile {
    fn read_bytes_at(&self, offset: u64, size: usize) -> io::Result<Bytes> {
        self.data.read_bytes_at(offset, size)
    }
}

impl WriteAt for SparseMemFile {
    fn write_at(&mut self, offset: u64, buf: &[u8]) -> io::Result<usize> {
        let start: usize = offset.try_into().map_err(|_| io::ErrorKind::InvalidInput)?;
        let end = start
            .checked_add(buf.len())
            .ok_or(io::ErrorKind::InvalidInput)?;
        let n = self.data.write_at(offset, buf)?;
        self.ranges |= RangeSet2::from(start..end);
        Ok(n)
    }

    fn flush(&mut self) -> io::Result<()> {
        Ok(())
    }
}

impl Size for SparseMemFile {
    fn size(&self) -> io::Result<Option<u64>> {
        Ok(Some(self.data.len() as u64))
    }
}