iroh_blobs/format/
collection.rs

1//! The collection type used by iroh
2use std::{collections::BTreeMap, future::Future};
3
4// n0_error::Context is no longer exported; use explicit mapping instead.
5use bao_tree::blake3;
6use bytes::Bytes;
7use n0_error::{Result, StdResultExt};
8use serde::{Deserialize, Serialize};
9
10use crate::{
11    api::{blobs::AddBytesOptions, Store},
12    get::{fsm, Stats},
13    hashseq::HashSeq,
14    util::temp_tag::TempTag,
15    BlobFormat, Hash,
16};
17
18/// A collection of blobs
19///
20/// Note that the format is subject to change.
21#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, Default)]
22pub struct Collection {
23    /// Links to the blobs in this collection
24    blobs: Vec<(String, Hash)>,
25}
26
27impl std::ops::Index<usize> for Collection {
28    type Output = (String, Hash);
29
30    fn index(&self, index: usize) -> &Self::Output {
31        &self.blobs[index]
32    }
33}
34
35impl<K, V> Extend<(K, V)> for Collection
36where
37    K: Into<String>,
38    V: Into<Hash>,
39{
40    fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
41        self.blobs
42            .extend(iter.into_iter().map(|(k, v)| (k.into(), v.into())));
43    }
44}
45
46impl<K, V> FromIterator<(K, V)> for Collection
47where
48    K: Into<String>,
49    V: Into<Hash>,
50{
51    fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
52        let mut res = Self::default();
53        res.extend(iter);
54        res
55    }
56}
57
58impl IntoIterator for Collection {
59    type Item = (String, Hash);
60    type IntoIter = std::vec::IntoIter<Self::Item>;
61
62    fn into_iter(self) -> Self::IntoIter {
63        self.blobs.into_iter()
64    }
65}
66
67/// A simple store trait for loading blobs
68pub trait SimpleStore {
69    /// Load a blob from the store
70    fn load(&self, hash: Hash) -> impl Future<Output = Result<Bytes>> + Send + '_;
71}
72
73impl SimpleStore for crate::api::Store {
74    async fn load(&self, hash: Hash) -> Result<Bytes> {
75        Ok(self.get_bytes(hash).await?)
76    }
77}
78
79/// Metadata for a collection
80///
81/// This is the wire format for the metadata blob.
82#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
83pub struct CollectionMeta {
84    header: [u8; 13], // Must contain "CollectionV0."
85    names: Vec<String>,
86}
87
88impl Default for CollectionMeta {
89    fn default() -> Self {
90        Self {
91            header: *Collection::HEADER,
92            names: Vec::default(),
93        }
94    }
95}
96
97impl CollectionMeta {
98    /// Verifies the header is the only valid collection header.
99    pub fn check_header(&self) -> bool {
100        &self.header == Collection::HEADER
101    }
102
103    /// Returns the names in this collection.
104    pub fn names(&self) -> &[String] {
105        &self.names
106    }
107
108    /// Returns a mutable reference to the names in this collection.
109    pub fn names_mut(&mut self) -> &mut Vec<String> {
110        &mut self.names
111    }
112}
113
114impl Collection {
115    /// The header for the collection format.
116    ///
117    /// This is the start of the metadata blob.
118    pub const HEADER: &'static [u8; 13] = b"CollectionV0.";
119
120    /// Convert the collection to an iterator of blobs, with the last being the
121    /// root blob.
122    ///
123    /// To persist the collection, write all the blobs to storage, and use the
124    /// hash of the last blob as the collection hash.
125    pub fn to_blobs(&self) -> impl DoubleEndedIterator<Item = Bytes> {
126        let meta = CollectionMeta {
127            header: *Self::HEADER,
128            names: self.names(),
129        };
130        let meta_bytes = postcard::to_stdvec(&meta).unwrap();
131        let meta_bytes_hash = blake3::hash(&meta_bytes).into();
132        let links = std::iter::once(meta_bytes_hash)
133            .chain(self.links())
134            .collect::<HashSeq>();
135        let links_bytes = links.into_inner();
136        [meta_bytes.into(), links_bytes].into_iter()
137    }
138
139    /// Read the collection from a get fsm.
140    ///
141    /// Returns the fsm at the start of the first child blob (if any),
142    /// the links array, and the collection.
143    pub async fn read_fsm(
144        fsm_at_start_root: fsm::AtStartRoot,
145    ) -> Result<(fsm::EndBlobNext, HashSeq, Collection)> {
146        let (next, links) = {
147            let curr = fsm_at_start_root.next();
148            let (curr, data) = curr.concatenate_into_vec().await?;
149            let links = HashSeq::new(data.into())
150                .ok_or_else(|| n0_error::anyerr!("links could not be parsed"))?;
151            (curr.next(), links)
152        };
153        let fsm::EndBlobNext::MoreChildren(at_meta) = next else {
154            n0_error::bail_any!("expected meta");
155        };
156        let (next, collection) = {
157            let mut children = links.clone();
158            let meta_link = children
159                .pop_front()
160                .ok_or_else(|| n0_error::anyerr!("meta link not found"))?;
161            let curr = at_meta.next(meta_link);
162            let (curr, names) = curr.concatenate_into_vec().await?;
163            let names = postcard::from_bytes::<CollectionMeta>(&names).anyerr()?;
164            n0_error::ensure_any!(
165                names.check_header(),
166                "expected header {:?}, got {:?}",
167                Self::HEADER,
168                names.header
169            );
170            let collection = Collection::from_parts(children, names);
171            (curr.next(), collection)
172        };
173        Ok((next, links, collection))
174    }
175
176    /// Read the collection and all it's children from a get fsm.
177    ///
178    /// Returns the collection, a map from blob offsets to bytes, and the stats.
179    pub async fn read_fsm_all(
180        fsm_at_start_root: crate::get::fsm::AtStartRoot,
181    ) -> Result<(Collection, BTreeMap<u64, Bytes>, Stats)> {
182        let (next, links, collection) = Self::read_fsm(fsm_at_start_root).await?;
183        let mut res = BTreeMap::new();
184        let mut curr = next;
185        let end = loop {
186            match curr {
187                fsm::EndBlobNext::MoreChildren(more) => {
188                    let child_offset = more.offset() - 1;
189                    let Some(hash) = links.get(usize::try_from(child_offset).anyerr()?) else {
190                        break more.finish();
191                    };
192                    let header = more.next(hash);
193                    let (next, blob) = header.concatenate_into_vec().await?;
194                    res.insert(child_offset - 1, blob.into());
195                    curr = next.next();
196                }
197                fsm::EndBlobNext::Closing(closing) => break closing,
198            }
199        };
200        let stats = end.next().await?;
201        Ok((collection, res, stats))
202    }
203
204    /// Create a new collection from a hash sequence and metadata.
205    pub async fn load(root: Hash, store: &impl SimpleStore) -> Result<Self> {
206        let hs = store.load(root).await?;
207        let hs = HashSeq::try_from(hs)?;
208        let meta_hash = hs
209            .iter()
210            .next()
211            .ok_or_else(|| n0_error::anyerr!("empty hash seq"))?;
212        let meta = store.load(meta_hash).await?;
213        let meta: CollectionMeta = postcard::from_bytes(&meta).anyerr()?;
214        n0_error::ensure_any!(
215            meta.names.len() + 1 == hs.len(),
216            "names and links length mismatch"
217        );
218        Ok(Self::from_parts(hs.into_iter().skip(1), meta))
219    }
220
221    /// Store a collection in a store. returns the root hash of the collection
222    /// as a TempTag.
223    pub async fn store(self, db: &Store) -> Result<TempTag> {
224        let (links, meta) = self.into_parts();
225        let meta_bytes = postcard::to_stdvec(&meta).anyerr()?;
226        let meta_tag = db.add_bytes(meta_bytes).temp_tag().await?;
227        let links_bytes = std::iter::once(meta_tag.hash())
228            .chain(links)
229            .collect::<HashSeq>();
230        let links_tag = db
231            .add_bytes_with_opts(AddBytesOptions {
232                data: links_bytes.into(),
233                format: BlobFormat::HashSeq,
234            })
235            .temp_tag()
236            .await?;
237        Ok(links_tag)
238    }
239
240    /// Split a collection into a sequence of links and metadata
241    fn into_parts(self) -> (Vec<Hash>, CollectionMeta) {
242        let mut names = Vec::with_capacity(self.blobs.len());
243        let mut links = Vec::with_capacity(self.blobs.len());
244        for (name, hash) in self.blobs {
245            names.push(name);
246            links.push(hash);
247        }
248        let meta = CollectionMeta {
249            header: *Self::HEADER,
250            names,
251        };
252        (links, meta)
253    }
254
255    /// Create a new collection from a list of hashes and metadata
256    fn from_parts(links: impl IntoIterator<Item = Hash>, meta: CollectionMeta) -> Self {
257        meta.names.into_iter().zip(links).collect()
258    }
259
260    /// Get the links to the blobs in this collection
261    fn links(&self) -> impl Iterator<Item = Hash> + '_ {
262        self.blobs.iter().map(|(_name, hash)| *hash)
263    }
264
265    /// Get the names of the blobs in this collection
266    fn names(&self) -> Vec<String> {
267        self.blobs.iter().map(|(name, _)| name.clone()).collect()
268    }
269
270    /// Iterate over the blobs in this collection
271    pub fn iter(&self) -> impl Iterator<Item = &(String, Hash)> {
272        self.blobs.iter()
273    }
274
275    /// Get the number of blobs in this collection
276    pub fn len(&self) -> usize {
277        self.blobs.len()
278    }
279
280    /// Check if this collection is empty
281    pub fn is_empty(&self) -> bool {
282        self.blobs.is_empty()
283    }
284
285    /// Add the given blob to the collection.
286    pub fn push(&mut self, name: String, hash: Hash) {
287        self.blobs.push((name, hash));
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use n0_error::{Result, StackResultExt};
294
295    use super::*;
296
297    #[test]
298    fn roundtrip_blob() {
299        let b = (
300            "test".to_string(),
301            blake3::Hash::from_hex(
302                "3aa61c409fd7717c9d9c639202af2fae470c0ef669be7ba2caea5779cb534e9d",
303            )
304            .unwrap()
305            .into(),
306        );
307
308        let mut buf = bytes::BytesMut::zeroed(1024);
309        postcard::to_slice(&b, &mut buf).unwrap();
310        let deserialize_b: (String, Hash) = postcard::from_bytes(&buf).unwrap();
311        assert_eq!(b, deserialize_b);
312    }
313
314    #[test]
315    fn roundtrip_collection_meta() {
316        let expected = CollectionMeta {
317            header: *Collection::HEADER,
318            names: vec!["test".to_string(), "a".to_string(), "b".to_string()],
319        };
320        let mut buf = bytes::BytesMut::zeroed(1024);
321        postcard::to_slice(&expected, &mut buf).unwrap();
322        let actual: CollectionMeta = postcard::from_bytes(&buf).unwrap();
323        assert_eq!(expected, actual);
324    }
325
326    #[tokio::test]
327    async fn collection_store_load() -> testresult::TestResult {
328        let collection = (0..3)
329            .map(|i| {
330                (
331                    format!("blob{i}"),
332                    crate::Hash::from(blake3::hash(&[i as u8])),
333                )
334            })
335            .collect::<Collection>();
336        let mut root = None;
337        let store = collection
338            .to_blobs()
339            .map(|data| {
340                let hash = crate::Hash::from(blake3::hash(&data));
341                root = Some(hash);
342                (hash, data)
343            })
344            .collect::<TestStore>();
345        let collection2 = Collection::load(root.unwrap(), &store).await?;
346        assert_eq!(collection, collection2);
347        Ok(())
348    }
349
350    /// An implementation of a [SimpleStore] for testing
351    struct TestStore(BTreeMap<Hash, Bytes>);
352
353    impl FromIterator<(Hash, Bytes)> for TestStore {
354        fn from_iter<T: IntoIterator<Item = (Hash, Bytes)>>(iter: T) -> Self {
355            Self(iter.into_iter().collect())
356        }
357    }
358
359    impl SimpleStore for TestStore {
360        async fn load(&self, hash: Hash) -> Result<Bytes> {
361            self.0.get(&hash).cloned().context("not found")
362        }
363    }
364}