From 14b6dd30a76f69f41538b2804bd6e16e0f664097 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Tue, 22 Apr 2025 08:46:24 +0200 Subject: [PATCH 1/6] perf(core): transform the inlined variant of NodeHash to a constant sized array --- CHANGELOG.md | 4 ++ crates/common/trie/node.rs | 4 +- crates/common/trie/node/branch.rs | 25 +++++------ crates/common/trie/node/extension.rs | 17 +++----- crates/common/trie/node/leaf.rs | 4 +- crates/common/trie/node_hash.rs | 65 ++++++++++++++++++---------- crates/common/trie/state.rs | 4 +- crates/common/trie/trie.rs | 22 +++++----- crates/common/trie/trie_iter.rs | 7 ++- crates/common/trie/verify_range.rs | 33 ++++++-------- crates/networking/p2p/sync.rs | 4 +- 11 files changed, 99 insertions(+), 90 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6aeb90b626..8588767dcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2025-04-25 + +- Transform the inlined variant of NodeHash to a constant sized array [2516](https://github.com/lambdaclass/ethrex/pull/2516) + ### 2025-04-11 - Removed some unnecessary clones and made some functions const: [2438](https://github.com/lambdaclass/ethrex/pull/2438) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index d6bf0b4a5a..d3429375cb 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -179,7 +179,7 @@ impl Node { fn decode_child(rlp: &[u8]) -> NodeHash { match decode_bytes(rlp) { Ok((hash, &[])) if hash.len() == 32 => NodeHash::Hashed(H256::from_slice(hash)), - Ok((&[], &[])) => NodeHash::Inline(vec![]), - _ => NodeHash::Inline(rlp.to_vec()), + Ok((&[], &[])) => NodeHash::default(), + _ => NodeHash::inline_from_slice(rlp), } } diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 21b86a6a92..9760b0c9bf 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -61,7 +61,7 @@ impl BranchNode { let child_hash = &self.choices[choice]; if child_hash.is_valid() { let child_node = state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; child_node.get(state, path) } else { @@ -93,7 +93,7 @@ impl BranchNode { // Insert into existing child and then update it choice_hash => { let child_node = state - .get_node(choice_hash.clone())? + .get_node(*choice_hash)? .ok_or(TrieError::InconsistentTree)?; let child_node = child_node.insert(state, path, value)?; @@ -138,7 +138,7 @@ impl BranchNode { let value = if let Some(choice_index) = path.next_choice() { if self.choices[choice_index].is_valid() { let child_node = state - .get_node(self.choices[choice_index].clone())? + .get_node(self.choices[choice_index])? .ok_or(TrieError::InconsistentTree)?; // Remove value from child node let (child_node, old_value) = child_node.remove(state, path.clone())?; @@ -179,15 +179,14 @@ impl BranchNode { (1, false) => { let (choice_index, child_hash) = children[0]; let child = state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; match child { // Replace self with an extension node leading to the child - Node::Branch(_) => ExtensionNode::new( - Nibbles::from_hex(vec![choice_index as u8]), - child_hash.clone(), - ) - .into(), + Node::Branch(_) => { + ExtensionNode::new(Nibbles::from_hex(vec![choice_index as u8]), *child_hash) + .into() + } // Replace self with the child extension node, updating its path in the process Node::Extension(mut extension_node) => { extension_node.prefix.prepend(choice_index as u8); @@ -207,7 +206,7 @@ impl BranchNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(self.encode_raw()) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node @@ -217,7 +216,7 @@ impl BranchNode { for child in self.choices.iter() { match child { NodeHash::Hashed(hash) => encoder = encoder.encode_bytes(&hash.0), - NodeHash::Inline(raw) if !raw.is_empty() => encoder = encoder.encode_raw(raw), + NodeHash::Inline(raw) if raw.1 != 0 => encoder = encoder.encode_raw(child.as_ref()), _ => encoder = encoder.encode_bytes(&[]), } } @@ -229,7 +228,7 @@ impl BranchNode { /// Inserts the node into the state and returns its hash pub fn insert_self(self, state: &mut TrieState) -> Result { let hash = self.compute_hash(); - state.insert_node(self.into(), hash.clone()); + state.insert_node(self.into(), hash); Ok(hash) } @@ -253,7 +252,7 @@ impl BranchNode { let child_hash = &self.choices[choice]; if child_hash.is_valid() { let child_node = state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; child_node.get_path(state, path, node_path)?; } diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 366b4898bd..24833fb32c 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -28,7 +28,7 @@ impl ExtensionNode { // Otherwise, no value is present. if path.skip_prefix(&self.prefix) { let child_node = state - .get_node(self.child.clone())? + .get_node(self.child)? .ok_or(TrieError::InconsistentTree)?; child_node.get(state, path) @@ -144,21 +144,14 @@ impl ExtensionNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(self.encode_raw()) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node pub fn encode_raw(&self) -> Vec { let mut buf = vec![]; let mut encoder = Encoder::new(&mut buf).encode_bytes(&self.prefix.encode_compact()); - match &self.child { - NodeHash::Inline(x) => { - encoder = encoder.encode_raw(x); - } - NodeHash::Hashed(x) => { - encoder = encoder.encode_bytes(&x.0); - } - } + encoder = self.child.encode(encoder); encoder.finish(); buf } @@ -166,7 +159,7 @@ impl ExtensionNode { /// Inserts the node into the state and returns its hash pub fn insert_self(self, state: &mut TrieState) -> Result { let hash = self.compute_hash(); - state.insert_node(self.into(), hash.clone()); + state.insert_node(self.into(), hash); Ok(hash) } @@ -187,7 +180,7 @@ impl ExtensionNode { // Continue to child if path.skip_prefix(&self.prefix) { let child_node = state - .get_node(self.child.clone())? + .get_node(self.child)? .ok_or(TrieError::InconsistentTree)?; child_node.get_path(state, path, node_path)?; } diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index c8b9332203..bcac4c4222 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -101,7 +101,7 @@ impl LeafNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(self.encode_raw()) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node @@ -118,7 +118,7 @@ impl LeafNode { /// Receives the offset that needs to be traversed to reach the leaf node from the canonical root, used to compute the node hash pub fn insert_self(self, state: &mut TrieState) -> Result { let hash = self.compute_hash(); - state.insert_node(self.into(), hash.clone()); + state.insert_node(self.into(), hash); Ok(hash) } diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index e4a6397edf..978f17eda2 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -1,5 +1,5 @@ use ethereum_types::H256; -use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode}; +use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, structs::Encoder}; #[cfg(feature = "libmdbx")] use libmdbx::orm::{Decodable, Encodable}; use sha3::{Digest, Keccak256}; @@ -8,16 +8,17 @@ use sha3::{Digest, Keccak256}; /// If the encoded node is less than 32 bits, contains the encoded node itself // TODO: Check if we can omit the Inline variant, as nodes will always be bigger than 32 bits in our use case // TODO: Check if making this `Copy` can make the code less verbose at a reasonable performance cost -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NodeHash { Hashed(H256), - Inline(Vec), + // Inline is always len < 32. We need to store the length of the data, a u8 is enough. + Inline(([u8; 31], u8)), } impl AsRef<[u8]> for NodeHash { fn as_ref(&self) -> &[u8] { match self { - NodeHash::Inline(x) => x.as_ref(), + NodeHash::Inline((slice, len)) => &slice[0..(*len as usize)], NodeHash::Hashed(x) => x.as_bytes(), } } @@ -25,21 +26,32 @@ impl AsRef<[u8]> for NodeHash { impl NodeHash { /// Returns the `NodeHash` of an encoded node (encoded using the NodeEncoder) - pub fn from_encoded_raw(encoded: Vec) -> NodeHash { + pub fn from_encoded_raw(encoded: &[u8]) -> NodeHash { if encoded.len() >= 32 { - let hash = Keccak256::new_with_prefix(&encoded).finalize(); + let hash = Keccak256::new_with_prefix(encoded).finalize(); NodeHash::Hashed(H256::from_slice(hash.as_slice())) } else { - NodeHash::Inline(encoded) + NodeHash::inline_from_slice(encoded) } } + + pub(crate) fn inline_from_slice(slice: &[u8]) -> NodeHash { + assert!(slice.len() < 32, "Slice must have a len < 32 to be inlined"); + let mut buffer = [0; 31]; + buffer[0..slice.len()].copy_from_slice(slice); + NodeHash::Inline((buffer, slice.len() as u8)) + } + /// Returns the finalized hash /// NOTE: This will hash smaller nodes, only use to get the final root hash, not for intermediate node hashes pub fn finalize(self) -> H256 { match self { - NodeHash::Inline(x) => { - H256::from_slice(Keccak256::new().chain_update(&*x).finalize().as_slice()) - } + NodeHash::Inline(_) => H256::from_slice( + Keccak256::new() + .chain_update(self.as_ref()) + .finalize() + .as_slice(), + ), NodeHash::Hashed(x) => x, } } @@ -48,12 +60,25 @@ impl NodeHash { /// The hash will only be considered invalid if it is empty /// Aka if it has a default value instead of being a product of hash computation pub fn is_valid(&self) -> bool { - !matches!(self, NodeHash::Inline(v) if v.is_empty()) + !matches!(self, NodeHash::Inline(v) if v.1 == 0) } /// Const version of `Default` trait impl pub const fn const_default() -> Self { - Self::Inline(vec![]) + Self::Inline(([0; 31], 0)) + } + + /// Encodes this NodeHash with the given encoder. + pub fn encode<'a>(&self, mut encoder: Encoder<'a>) -> Encoder<'a> { + match self { + NodeHash::Inline(_) => { + encoder = encoder.encode_raw(self.as_ref()); + } + NodeHash::Hashed(_) => { + encoder = encoder.encode_bytes(self.as_ref()); + } + } + encoder } } @@ -61,7 +86,7 @@ impl From> for NodeHash { fn from(value: Vec) -> Self { match value.len() { 32 => NodeHash::Hashed(H256::from_slice(&value)), - _ => NodeHash::Inline(value), + _ => NodeHash::inline_from_slice(&value), } } } @@ -74,19 +99,13 @@ impl From for NodeHash { impl From for Vec { fn from(val: NodeHash) -> Self { - match val { - NodeHash::Hashed(x) => x.0.to_vec(), - NodeHash::Inline(x) => x, - } + val.as_ref().to_vec() } } impl From<&NodeHash> for Vec { fn from(val: &NodeHash) -> Self { - match val { - NodeHash::Hashed(x) => x.0.to_vec(), - NodeHash::Inline(x) => x.clone(), - } + val.as_ref().to_vec() } } @@ -104,14 +123,14 @@ impl Decodable for NodeHash { fn decode(b: &[u8]) -> anyhow::Result { Ok(match b.len() { 32 => NodeHash::Hashed(H256::from_slice(b)), - _ => NodeHash::Inline(b.into()), + _ => NodeHash::inline_from_slice(b), }) } } impl Default for NodeHash { fn default() -> Self { - NodeHash::Inline(Vec::new()) + NodeHash::Inline(([0; 31], 0)) } } diff --git a/crates/common/trie/state.rs b/crates/common/trie/state.rs index 124b240071..893e882dc5 100644 --- a/crates/common/trie/state.rs +++ b/crates/common/trie/state.rs @@ -26,8 +26,8 @@ impl TrieState { /// Retrieves a node based on its hash pub fn get_node(&self, hash: NodeHash) -> Result, TrieError> { // Decode the node if it is inlined - if let NodeHash::Inline(encoded) = hash { - return Ok(Some(Node::decode_raw(&encoded)?)); + if let NodeHash::Inline(_) = hash { + return Ok(Some(Node::decode_raw(hash.as_ref())?)); } if let Some(node) = self.cache.get(&hash) { return Ok(Some(node.clone())); diff --git a/crates/common/trie/trie.rs b/crates/common/trie/trie.rs index dbe8e4e83f..9e1ebd8f82 100644 --- a/crates/common/trie/trie.rs +++ b/crates/common/trie/trie.rs @@ -73,7 +73,7 @@ impl Trie { if let Some(root) = &self.root { let root_node = self .state - .get_node(root.clone())? + .get_node(*root)? .ok_or(TrieError::InconsistentTree)?; root_node.get(&self.state, Nibbles::from_bytes(path)) } else { @@ -128,7 +128,7 @@ impl Trie { Ok(self .root .as_ref() - .map(|root| root.clone().finalize()) + .map(|root| root.finalize()) .unwrap_or(*EMPTY_TRIE_HASH)) } @@ -137,7 +137,7 @@ impl Trie { pub fn hash_no_commit(&self) -> H256 { self.root .as_ref() - .map(|root| root.clone().finalize()) + .map(|root| root.finalize()) .unwrap_or(*EMPTY_TRIE_HASH) } @@ -158,10 +158,10 @@ impl Trie { return Ok(node_path); }; // If the root is inlined, add it to the node_path - if let NodeHash::Inline(node) = root { - node_path.push(node.to_vec()); + if let NodeHash::Inline(_) = root { + node_path.push(root.as_ref().to_vec()); } - if let Some(root_node) = self.state.get_node(root.clone())? { + if let Some(root_node) = self.state.get_node(*root)? { root_node.get_path(&self.state, Nibbles::from_bytes(path), &mut node_path)?; } Ok(node_path) @@ -177,7 +177,7 @@ impl Trie { let Some(root_node) = self .root .as_ref() - .map(|root| self.state.get_node(root.clone())) + .map(|root| self.state.get_node(*root)) .transpose()? .flatten() else { @@ -230,7 +230,7 @@ impl Trie { } trie.root .as_ref() - .map(|root| root.clone().finalize()) + .map(|root| root.finalize()) .unwrap_or(*EMPTY_TRIE_HASH) } @@ -274,7 +274,7 @@ impl Trie { let Some(root_node) = self .root .as_ref() - .map(|root| self.state.get_node(root.clone())) + .map(|root| self.state.get_node(*root)) .transpose()? .flatten() else { @@ -295,7 +295,7 @@ impl Trie { if child_hash.is_valid() { let child_node = self .state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; self.get_node_inner(child_node, partial_path) } else { @@ -310,7 +310,7 @@ impl Trie { { let child_node = self .state - .get_node(extension_node.child.clone())? + .get_node(extension_node.child)? .ok_or(TrieError::InconsistentTree)?; self.get_node_inner(child_node, partial_path) } else { diff --git a/crates/common/trie/trie_iter.rs b/crates/common/trie/trie_iter.rs index 5b90ddd13c..5526b62b2b 100644 --- a/crates/common/trie/trie_iter.rs +++ b/crates/common/trie/trie_iter.rs @@ -9,7 +9,7 @@ pub struct TrieIterator { impl TrieIterator { pub(crate) fn new(trie: Trie) -> Self { let stack = if let Some(root) = &trie.root { - vec![(Nibbles::default(), root.clone())] + vec![(Nibbles::default(), *root)] } else { vec![] }; @@ -34,7 +34,7 @@ impl Iterator for TrieIterator { if child.is_valid() { let mut child_path = path.clone(); child_path.append(choice as u8); - self.stack.push((child_path, child.clone())) + self.stack.push((child_path, *child)) } } } @@ -42,8 +42,7 @@ impl Iterator for TrieIterator { // Update path path.extend(&extension_node.prefix); // Add child to the stack - self.stack - .push((path.clone(), extension_node.child.clone())); + self.stack.push((path.clone(), extension_node.child)); } Node::Leaf(leaf) => { path.extend(&leaf.partial); diff --git a/crates/common/trie/verify_range.rs b/crates/common/trie/verify_range.rs index 51ae1978a5..358bdf422e 100644 --- a/crates/common/trie/verify_range.rs +++ b/crates/common/trie/verify_range.rs @@ -153,7 +153,7 @@ fn fill_node( let node = proof_nodes.get_node(node_hash)?; let child_hash = get_child(path, &node); if let Some(ref child_hash) = child_hash { - trie_state.insert_node(node, node_hash.clone()); + trie_state.insert_node(node, *node_hash); fill_node(path, child_hash, trie_state, proof_nodes) } else { let value = match &node { @@ -163,7 +163,7 @@ fn fill_node( .then_some(n.value.clone()) .unwrap_or_default(), }; - trie_state.insert_node(node, node_hash.clone()); + trie_state.insert_node(node, *node_hash); Ok(value) } } @@ -174,12 +174,12 @@ fn get_child<'a>(path: &'a mut Nibbles, node: &'a Node) -> Option { Node::Branch(n) => { if let Some(choice) = path.next_choice() { if n.choices[choice].is_valid() { - return Some(n.choices[choice].clone()); + return Some(n.choices[choice]); } } None } - Node::Extension(n) => path.skip_prefix(&n.prefix).then_some(n.child.clone()), + Node::Extension(n) => path.skip_prefix(&n.prefix).then_some(n.child), Node::Leaf(_) => None, } } @@ -212,7 +212,7 @@ fn has_right_element_inner( if n.choices[choice + 1..].iter().any(|child| child.is_valid()) { return Ok(true); } else if n.choices[choice].is_valid() { - return has_right_element_inner(n.choices[choice].clone(), path, trie_state); + return has_right_element_inner(n.choices[choice], path, trie_state); } } } @@ -261,7 +261,7 @@ fn remove_internal_references_inner( return Ok(true); } // We already looked up the nodes when filling the state so this shouldn't fail - let node = trie_state.get_node(node_hash.clone())?.unwrap(); + let node = trie_state.get_node(node_hash)?.unwrap(); match node { Node::Branch(mut n) => { // If none of the paths have a next choice nibble then it means that this is the end of the path @@ -276,7 +276,7 @@ fn remove_internal_references_inner( // Keep going // Check if the child extension node should be removed as a result of this process let should_remove = remove_internal_references_inner( - n.choices[left_choice].clone(), + n.choices[left_choice], left_path, right_path, trie_state, @@ -295,13 +295,9 @@ fn remove_internal_references_inner( } // Remove nodes on the left and right choice's subtries let should_remove_left = - remove_node(n.choices[left_choice].clone(), left_path, false, trie_state); - let should_remove_right = remove_node( - n.choices[right_choice].clone(), - right_path, - true, - trie_state, - ); + remove_node(n.choices[left_choice], left_path, false, trie_state); + let should_remove_right = + remove_node(n.choices[right_choice], right_path, true, trie_state); // Remove left and right child nodes if their subtries where wiped in the process if should_remove_left { n.choices[left_choice] = NodeHash::default(); @@ -373,7 +369,7 @@ fn remove_node( return false; } // We already looked up the nodes when filling the state so this shouldn't fail - let node = trie_state.get_node(node_hash.clone()).unwrap().unwrap(); + let node = trie_state.get_node(node_hash).unwrap().unwrap(); match node { Node::Branch(mut n) => { // Remove child nodes @@ -391,8 +387,7 @@ fn remove_node( } } // Remove nodes to the left/right of the choice's subtrie - let should_remove = - remove_node(n.choices[choice].clone(), path, remove_left, trie_state); + let should_remove = remove_node(n.choices[choice], path, remove_left, trie_state); if should_remove { n.choices[choice] = NodeHash::default(); } @@ -440,10 +435,10 @@ impl<'a> ProofNodeStorage<'a> { let Some(encoded) = self.nodes.get(hash.as_bytes()) else { return Err(TrieError::Verify(format!("proof node missing: {hash}"))); }; - *encoded + encoded.as_slice() } - NodeHash::Inline(ref encoded) => encoded, + NodeHash::Inline(_) => hash.as_ref(), }; Ok(Node::decode_raw(encoded)?) } diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 4df3217064..d3ae9a2798 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -677,13 +677,13 @@ fn node_missing_children( match &node { Node::Branch(node) => { for (index, child) in node.choices.iter().enumerate() { - if child.is_valid() && trie_state.get_node(child.clone())?.is_none() { + if child.is_valid() && trie_state.get_node(*child)?.is_none() { paths.push(parent_path.append_new(index as u8)); } } } Node::Extension(node) => { - if node.child.is_valid() && trie_state.get_node(node.child.clone())?.is_none() { + if node.child.is_valid() && trie_state.get_node(node.child)?.is_none() { paths.push(parent_path.concat(node.prefix.clone())); } } From db4a93a67d0cb9e5bab0e856455e6c466756cb51 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Tue, 22 Apr 2025 10:20:20 +0200 Subject: [PATCH 2/6] perf(core): make TrieDb use NodeHash as key instead of Vec --- crates/common/trie/db.rs | 18 +++++------ crates/common/trie/node_hash.rs | 14 +++++++++ crates/common/trie/state.rs | 10 +++--- crates/common/trie/trie.rs | 11 +++---- crates/storage/store_db/in_memory.rs | 4 +-- crates/storage/store_db/libmdbx.rs | 4 +-- crates/storage/trie_db/libmdbx.rs | 37 ++++++++++++++--------- crates/storage/trie_db/libmdbx_dupsort.rs | 24 ++++++++------- crates/storage/trie_db/test_utils.rs | 3 +- crates/storage/trie_db/utils.rs | 17 +++++------ 10 files changed, 83 insertions(+), 59 deletions(-) diff --git a/crates/common/trie/db.rs b/crates/common/trie/db.rs index 8f58aa7566..498ab1b9d4 100644 --- a/crates/common/trie/db.rs +++ b/crates/common/trie/db.rs @@ -1,23 +1,23 @@ -use crate::error::TrieError; +use crate::{error::TrieError, NodeHash}; use std::{ collections::HashMap, sync::{Arc, Mutex}, }; pub trait TrieDB: Send + Sync { - fn get(&self, key: Vec) -> Result>, TrieError>; - fn put(&self, key: Vec, value: Vec) -> Result<(), TrieError>; + fn get(&self, key: NodeHash) -> Result>, TrieError>; + fn put(&self, key: NodeHash, value: Vec) -> Result<(), TrieError>; // fn put_batch(&self, key: Vec, value: Vec) -> Result<(), TrieError>; - fn put_batch(&self, key_values: Vec<(Vec, Vec)>) -> Result<(), TrieError>; + fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError>; } /// InMemory implementation for the TrieDB trait, with get and put operations. pub struct InMemoryTrieDB { - inner: Arc, Vec>>>, + inner: Arc>>>, } impl InMemoryTrieDB { - pub const fn new(map: Arc, Vec>>>) -> Self { + pub const fn new(map: Arc>>>) -> Self { Self { inner: map } } pub fn new_empty() -> Self { @@ -28,7 +28,7 @@ impl InMemoryTrieDB { } impl TrieDB for InMemoryTrieDB { - fn get(&self, key: Vec) -> Result>, TrieError> { + fn get(&self, key: NodeHash) -> Result>, TrieError> { Ok(self .inner .lock() @@ -37,7 +37,7 @@ impl TrieDB for InMemoryTrieDB { .cloned()) } - fn put(&self, key: Vec, value: Vec) -> Result<(), TrieError> { + fn put(&self, key: NodeHash, value: Vec) -> Result<(), TrieError> { self.inner .lock() .map_err(|_| TrieError::LockError)? @@ -45,7 +45,7 @@ impl TrieDB for InMemoryTrieDB { Ok(()) } - fn put_batch(&self, key_values: Vec<(Vec, Vec)>) -> Result<(), TrieError> { + fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError> { let mut db = self.inner.lock().map_err(|_| TrieError::LockError)?; for (key, value) in key_values { diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index 978f17eda2..f2c3a6313c 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -35,6 +35,20 @@ impl NodeHash { } } + pub fn len(&self) -> usize { + match self { + NodeHash::Hashed(h256) => h256.as_bytes().len(), + NodeHash::Inline(value) => value.1 as usize, + } + } + + pub fn is_empty(&self) -> bool { + match self { + NodeHash::Hashed(h256) => h256.as_bytes().is_empty(), + NodeHash::Inline(value) => value.1 == 0, + } + } + pub(crate) fn inline_from_slice(slice: &[u8]) -> NodeHash { assert!(slice.len() < 32, "Slice must have a len < 32 to be inlined"); let mut buffer = [0; 31]; diff --git a/crates/common/trie/state.rs b/crates/common/trie/state.rs index 893e882dc5..be779d1fb8 100644 --- a/crates/common/trie/state.rs +++ b/crates/common/trie/state.rs @@ -33,7 +33,7 @@ impl TrieState { return Ok(Some(node.clone())); }; self.db - .get(hash.into())? + .get(hash)? .map(|rlp| Node::decode(&rlp).map_err(TrieError::RLPDecode)) .transpose() } @@ -68,7 +68,7 @@ impl TrieState { fn commit_node_tail_recursive( &mut self, node_hash: &NodeHash, - acc: &mut Vec<(Vec, Vec)>, + acc: &mut Vec<(NodeHash, Vec)>, ) -> Result<(), TrieError> { let Some(node) = self.cache.remove(node_hash) else { // If the node is not in the cache then it means it is already stored in the DB @@ -87,7 +87,7 @@ impl TrieState { Node::Leaf(_) => {} } // Commit self - acc.push((node_hash.into(), node.encode_to_vec())); + acc.push((*node_hash, node.encode_to_vec())); Ok(()) } @@ -96,7 +96,7 @@ impl TrieState { pub fn write_node(&mut self, node: Node, hash: NodeHash) -> Result<(), TrieError> { // Don't insert the node if it is already inlined on the parent if matches!(hash, NodeHash::Hashed(_)) { - self.db.put(hash.into(), node.encode_to_vec())?; + self.db.put(hash, node.encode_to_vec())?; } Ok(()) } @@ -108,7 +108,7 @@ impl TrieState { .iter() .filter_map(|node| { let hash = node.compute_hash(); - matches!(hash, NodeHash::Hashed(_)).then(|| (hash.into(), node.encode_to_vec())) + matches!(hash, NodeHash::Hashed(_)).then(|| (hash, node.encode_to_vec())) }) .collect(); self.db.put_batch(key_values)?; diff --git a/crates/common/trie/trie.rs b/crates/common/trie/trie.rs index 9e1ebd8f82..ec0017a81c 100644 --- a/crates/common/trie/trie.rs +++ b/crates/common/trie/trie.rs @@ -11,14 +11,13 @@ mod trie_iter; mod verify_range; use ethereum_types::H256; use ethrex_rlp::constants::RLP_NULL; -use node_hash::NodeHash; use sha3::{Digest, Keccak256}; use std::collections::HashSet; pub use self::db::{InMemoryTrieDB, TrieDB}; pub use self::nibbles::Nibbles; pub use self::verify_range::verify_range; -pub use self::{node::Node, state::TrieState}; +pub use self::{node::Node, node_hash::NodeHash, state::TrieState}; pub use self::error::TrieError; use self::{node::LeafNode, trie_iter::TrieIterator}; @@ -241,15 +240,15 @@ impl Trie { struct NullTrieDB; impl TrieDB for NullTrieDB { - fn get(&self, _key: Vec) -> Result>, TrieError> { + fn get(&self, _key: NodeHash) -> Result>, TrieError> { Ok(None) } - fn put(&self, _key: Vec, _value: Vec) -> Result<(), TrieError> { + fn put(&self, _key: NodeHash, _value: Vec) -> Result<(), TrieError> { Ok(()) } - fn put_batch(&self, _key_values: Vec<(Vec, Vec)>) -> Result<(), TrieError> { + fn put_batch(&self, _key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError> { Ok(()) } } @@ -340,7 +339,7 @@ impl Trie { use std::sync::Arc; use std::sync::Mutex; - let hmap: HashMap, Vec> = HashMap::new(); + let hmap: HashMap> = HashMap::new(); let map = Arc::new(Mutex::new(hmap)); let db = InMemoryTrieDB::new(map); Trie::new(Box::new(db)) diff --git a/crates/storage/store_db/in_memory.rs b/crates/storage/store_db/in_memory.rs index cd0974c57f..4658bf2a45 100644 --- a/crates/storage/store_db/in_memory.rs +++ b/crates/storage/store_db/in_memory.rs @@ -9,14 +9,14 @@ use ethrex_common::types::{ payload::PayloadBundle, AccountState, Block, BlockBody, BlockHash, BlockHeader, BlockNumber, ChainConfig, Index, Receipt, }; -use ethrex_trie::{InMemoryTrieDB, Nibbles, Trie}; +use ethrex_trie::{InMemoryTrieDB, Nibbles, NodeHash, Trie}; use std::{ collections::{BTreeMap, HashMap}, fmt::Debug, sync::{Arc, Mutex, MutexGuard}, }; -pub type NodeMap = Arc, Vec>>>; +pub type NodeMap = Arc>>>; #[derive(Default, Clone)] pub struct Store(Arc>); diff --git a/crates/storage/store_db/libmdbx.rs b/crates/storage/store_db/libmdbx.rs index 4e5fb878b4..1fd9203a99 100644 --- a/crates/storage/store_db/libmdbx.rs +++ b/crates/storage/store_db/libmdbx.rs @@ -19,7 +19,7 @@ use ethrex_common::types::{ use ethrex_rlp::decode::RLPDecode; use ethrex_rlp::encode::RLPEncode; use ethrex_rlp::error::RLPDecodeError; -use ethrex_trie::{Nibbles, Trie}; +use ethrex_trie::{Nibbles, NodeHash, Trie}; use libmdbx::orm::{Decodable, DupSort, Encodable, Table}; use libmdbx::{ dupsort, @@ -1150,7 +1150,7 @@ table!( table!( /// state trie nodes - ( StateTrieNodes ) Vec => Vec + ( StateTrieNodes ) NodeHash => Vec ); // Local Blocks diff --git a/crates/storage/trie_db/libmdbx.rs b/crates/storage/trie_db/libmdbx.rs index 67c602f888..42bd5fcf1f 100644 --- a/crates/storage/trie_db/libmdbx.rs +++ b/crates/storage/trie_db/libmdbx.rs @@ -1,4 +1,4 @@ -use ethrex_trie::error::TrieError; +use ethrex_trie::{error::TrieError, NodeHash}; use libmdbx::orm::{Database, Table}; use std::{marker::PhantomData, sync::Arc}; /// Libmdbx implementation for the TrieDB trait, with get and put operations. @@ -11,7 +11,7 @@ use ethrex_trie::TrieDB; impl LibmdbxTrieDB where - T: Table, Value = Vec>, + T: Table>, { pub fn new(db: Arc) -> Self { Self { @@ -23,20 +23,20 @@ where impl TrieDB for LibmdbxTrieDB where - T: Table, Value = Vec>, + T: Table>, { - fn get(&self, key: Vec) -> Result>, TrieError> { + fn get(&self, key: NodeHash) -> Result>, TrieError> { let txn = self.db.begin_read().map_err(TrieError::DbError)?; txn.get::(key).map_err(TrieError::DbError) } - fn put(&self, key: Vec, value: Vec) -> Result<(), TrieError> { + fn put(&self, key: NodeHash, value: Vec) -> Result<(), TrieError> { let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?; txn.upsert::(key, value).map_err(TrieError::DbError)?; txn.commit().map_err(TrieError::DbError) } - fn put_batch(&self, key_values: Vec<(Vec, Vec)>) -> Result<(), TrieError> { + fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError> { let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?; for (key, value) in key_values { txn.upsert::(key, value).map_err(TrieError::DbError)?; @@ -49,6 +49,7 @@ where mod test { use super::LibmdbxTrieDB; use crate::trie_db::test_utils::libmdbx::{new_db, TestNodes}; + use ethrex_trie::NodeHash; use ethrex_trie::Trie; use ethrex_trie::TrieDB; use libmdbx::{ @@ -58,28 +59,36 @@ mod test { use std::sync::Arc; use tempdir::TempDir; + pub const HELLO_NODEHASH: NodeHash = NodeHash::Inline(( + [ + 104, 101, 108, 108, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ], + 2, + )); + #[test] fn simple_addition() { table!( /// NodeHash to Node table - ( Nodes ) Vec => Vec + ( Nodes ) NodeHash => Vec ); let inner_db = new_db::(); let db = LibmdbxTrieDB::::new(inner_db); - assert_eq!(db.get("hello".into()).unwrap(), None); - db.put("hello".into(), "value".into()).unwrap(); - assert_eq!(db.get("hello".into()).unwrap(), Some("value".into())); + assert_eq!(db.get(HELLO_NODEHASH).unwrap(), None); + db.put(HELLO_NODEHASH, "value".into()).unwrap(); + assert_eq!(db.get(HELLO_NODEHASH).unwrap(), Some("value".into())); } #[test] fn different_tables() { table!( /// vec to vec - ( TableA ) Vec => Vec + ( TableA ) NodeHash => Vec ); table!( /// vec to vec - ( TableB ) Vec => Vec + ( TableB ) NodeHash => Vec ); let tables = [table_info!(TableA), table_info!(TableB)] .into_iter() @@ -88,8 +97,8 @@ mod test { let inner_db = Arc::new(Database::create(None, &tables).unwrap()); let db_a = LibmdbxTrieDB::::new(inner_db.clone()); let db_b = LibmdbxTrieDB::::new(inner_db.clone()); - db_a.put("hello".into(), "value".into()).unwrap(); - assert_eq!(db_b.get("hello".into()).unwrap(), None); + db_a.put(HELLO_NODEHASH, "value".into()).unwrap(); + assert_eq!(db_b.get(HELLO_NODEHASH).unwrap(), None); } #[test] diff --git a/crates/storage/trie_db/libmdbx_dupsort.rs b/crates/storage/trie_db/libmdbx_dupsort.rs index 398c68fcf1..a420a01959 100644 --- a/crates/storage/trie_db/libmdbx_dupsort.rs +++ b/crates/storage/trie_db/libmdbx_dupsort.rs @@ -1,8 +1,8 @@ use std::{marker::PhantomData, sync::Arc}; use super::utils::node_hash_to_fixed_size; -use ethrex_trie::error::TrieError; use ethrex_trie::TrieDB; +use ethrex_trie::{error::TrieError, NodeHash}; use libmdbx::orm::{Database, DupSort, Encodable}; /// Libmdbx implementation for the TrieDB trait for a dupsort table with a fixed primary key. @@ -37,13 +37,13 @@ where T: DupSort>, SK: Clone + Encodable, { - fn get(&self, key: Vec) -> Result>, TrieError> { + fn get(&self, key: NodeHash) -> Result>, TrieError> { let txn = self.db.begin_read().map_err(TrieError::DbError)?; txn.get::((self.fixed_key.clone(), node_hash_to_fixed_size(key))) .map_err(TrieError::DbError) } - fn put(&self, key: Vec, value: Vec) -> Result<(), TrieError> { + fn put(&self, key: NodeHash, value: Vec) -> Result<(), TrieError> { let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?; txn.upsert::( (self.fixed_key.clone(), node_hash_to_fixed_size(key)), @@ -53,7 +53,7 @@ where txn.commit().map_err(TrieError::DbError) } - fn put_batch(&self, key_values: Vec<(Vec, Vec)>) -> Result<(), TrieError> { + fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError> { let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?; for (key, value) in key_values { txn.upsert::( @@ -82,9 +82,10 @@ mod test { fn simple_addition() { let inner_db = new_db::(); let db = LibmdbxDupsortTrieDB::::new(inner_db, [5; 32]); - assert_eq!(db.get("hello".into()).unwrap(), None); - db.put("hello".into(), "value".into()).unwrap(); - assert_eq!(db.get("hello".into()).unwrap(), Some("value".into())); + let key = NodeHash::from_encoded_raw(b"hello"); + assert_eq!(db.get(key).unwrap(), None); + db.put(key, "value".into()).unwrap(); + assert_eq!(db.get(key).unwrap(), Some("value".into())); } #[test] @@ -92,9 +93,10 @@ mod test { let inner_db = new_db::(); let db_a = LibmdbxDupsortTrieDB::::new(inner_db.clone(), [5; 32]); let db_b = LibmdbxDupsortTrieDB::::new(inner_db, [7; 32]); - db_a.put("hello".into(), "hello!".into()).unwrap(); - db_b.put("hello".into(), "go away!".into()).unwrap(); - assert_eq!(db_a.get("hello".into()).unwrap(), Some("hello!".into())); - assert_eq!(db_b.get("hello".into()).unwrap(), Some("go away!".into())); + let key = NodeHash::from_encoded_raw(b"hello"); + db_a.put(key, "hello!".into()).unwrap(); + db_b.put(key, "go away!".into()).unwrap(); + assert_eq!(db_a.get(key).unwrap(), Some("hello!".into())); + assert_eq!(db_b.get(key).unwrap(), Some("go away!".into())); } } diff --git a/crates/storage/trie_db/test_utils.rs b/crates/storage/trie_db/test_utils.rs index 886a3b7edf..69b7376edf 100644 --- a/crates/storage/trie_db/test_utils.rs +++ b/crates/storage/trie_db/test_utils.rs @@ -2,6 +2,7 @@ pub mod libmdbx { use std::{path::PathBuf, sync::Arc}; + use ethrex_trie::NodeHash; use libmdbx::{ orm::{table_info, Database, Table}, table, @@ -9,7 +10,7 @@ pub mod libmdbx { table!( /// Test table. - (TestNodes) Vec => Vec + (TestNodes) NodeHash => Vec ); /// Creates a new DB on a given path diff --git a/crates/storage/trie_db/utils.rs b/crates/storage/trie_db/utils.rs index 10141a2e5a..cd4d456983 100644 --- a/crates/storage/trie_db/utils.rs +++ b/crates/storage/trie_db/utils.rs @@ -1,16 +1,15 @@ #[cfg(any(feature = "libmdbx", feature = "redb"))] // In order to use NodeHash as key in a dupsort table we must encode it into a fixed size type -pub fn node_hash_to_fixed_size(node_hash: Vec) -> [u8; 33] { - // keep original len so we can re-construct it later +pub fn node_hash_to_fixed_size(node_hash: ethrex_trie::NodeHash) -> [u8; 33] { + let node_hash_ref = node_hash.as_ref(); let original_len = node_hash.len(); // original len will always be lower or equal to 32 bytes debug_assert!(original_len <= 32); - // Pad the node_hash with zeros to make it fixed_size (in case of inline) - let mut node_hash = node_hash; - node_hash.resize(32, 0); + + let mut buffer = [0u8; 33]; + // Encode the node as [original_len, node_hash...] - std::array::from_fn(|i| match i { - 0 => original_len as u8, - n => node_hash[n - 1], - }) + buffer[0] = original_len as u8; + buffer[1..(original_len + 1)].copy_from_slice(node_hash_ref); + buffer } From 6e724b52f9806e31b785808790787d281c9c89eb Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Tue, 22 Apr 2025 08:46:24 +0200 Subject: [PATCH 3/6] perf(core): transform the inlined variant of NodeHash to a constant sized array --- CHANGELOG.md | 4 ++ crates/common/trie/node.rs | 7 ++- crates/common/trie/node/branch.rs | 25 +++++---- crates/common/trie/node/extension.rs | 17 ++---- crates/common/trie/node/leaf.rs | 4 +- crates/common/trie/node_hash.rs | 78 +++++++++++++++++----------- crates/common/trie/state.rs | 4 +- crates/common/trie/trie.rs | 22 ++++---- crates/common/trie/trie_iter.rs | 7 ++- crates/common/trie/verify_range.rs | 33 +++++------- crates/networking/p2p/sync.rs | 4 +- 11 files changed, 107 insertions(+), 98 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6aeb90b626..c4290c7f9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2025-04-22 + +- Transform the inlined variant of NodeHash to a constant sized array [2516](https://github.com/lambdaclass/ethrex/pull/2516) + ### 2025-04-11 - Removed some unnecessary clones and made some functions const: [2438](https://github.com/lambdaclass/ethrex/pull/2438) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index d6bf0b4a5a..7a31e25f87 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -5,7 +5,6 @@ mod leaf; use std::array; pub use branch::BranchNode; -use ethereum_types::H256; use ethrex_rlp::{decode::decode_bytes, error::RLPDecodeError, structs::Decoder}; pub use extension::ExtensionNode; pub use leaf::LeafNode; @@ -178,8 +177,8 @@ impl Node { fn decode_child(rlp: &[u8]) -> NodeHash { match decode_bytes(rlp) { - Ok((hash, &[])) if hash.len() == 32 => NodeHash::Hashed(H256::from_slice(hash)), - Ok((&[], &[])) => NodeHash::Inline(vec![]), - _ => NodeHash::Inline(rlp.to_vec()), + Ok((hash, &[])) if hash.len() == 32 => NodeHash::from_slice(hash), + Ok((&[], &[])) => NodeHash::default(), + _ => NodeHash::from_slice(rlp), } } diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 21b86a6a92..9760b0c9bf 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -61,7 +61,7 @@ impl BranchNode { let child_hash = &self.choices[choice]; if child_hash.is_valid() { let child_node = state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; child_node.get(state, path) } else { @@ -93,7 +93,7 @@ impl BranchNode { // Insert into existing child and then update it choice_hash => { let child_node = state - .get_node(choice_hash.clone())? + .get_node(*choice_hash)? .ok_or(TrieError::InconsistentTree)?; let child_node = child_node.insert(state, path, value)?; @@ -138,7 +138,7 @@ impl BranchNode { let value = if let Some(choice_index) = path.next_choice() { if self.choices[choice_index].is_valid() { let child_node = state - .get_node(self.choices[choice_index].clone())? + .get_node(self.choices[choice_index])? .ok_or(TrieError::InconsistentTree)?; // Remove value from child node let (child_node, old_value) = child_node.remove(state, path.clone())?; @@ -179,15 +179,14 @@ impl BranchNode { (1, false) => { let (choice_index, child_hash) = children[0]; let child = state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; match child { // Replace self with an extension node leading to the child - Node::Branch(_) => ExtensionNode::new( - Nibbles::from_hex(vec![choice_index as u8]), - child_hash.clone(), - ) - .into(), + Node::Branch(_) => { + ExtensionNode::new(Nibbles::from_hex(vec![choice_index as u8]), *child_hash) + .into() + } // Replace self with the child extension node, updating its path in the process Node::Extension(mut extension_node) => { extension_node.prefix.prepend(choice_index as u8); @@ -207,7 +206,7 @@ impl BranchNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(self.encode_raw()) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node @@ -217,7 +216,7 @@ impl BranchNode { for child in self.choices.iter() { match child { NodeHash::Hashed(hash) => encoder = encoder.encode_bytes(&hash.0), - NodeHash::Inline(raw) if !raw.is_empty() => encoder = encoder.encode_raw(raw), + NodeHash::Inline(raw) if raw.1 != 0 => encoder = encoder.encode_raw(child.as_ref()), _ => encoder = encoder.encode_bytes(&[]), } } @@ -229,7 +228,7 @@ impl BranchNode { /// Inserts the node into the state and returns its hash pub fn insert_self(self, state: &mut TrieState) -> Result { let hash = self.compute_hash(); - state.insert_node(self.into(), hash.clone()); + state.insert_node(self.into(), hash); Ok(hash) } @@ -253,7 +252,7 @@ impl BranchNode { let child_hash = &self.choices[choice]; if child_hash.is_valid() { let child_node = state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; child_node.get_path(state, path, node_path)?; } diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 366b4898bd..24833fb32c 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -28,7 +28,7 @@ impl ExtensionNode { // Otherwise, no value is present. if path.skip_prefix(&self.prefix) { let child_node = state - .get_node(self.child.clone())? + .get_node(self.child)? .ok_or(TrieError::InconsistentTree)?; child_node.get(state, path) @@ -144,21 +144,14 @@ impl ExtensionNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(self.encode_raw()) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node pub fn encode_raw(&self) -> Vec { let mut buf = vec![]; let mut encoder = Encoder::new(&mut buf).encode_bytes(&self.prefix.encode_compact()); - match &self.child { - NodeHash::Inline(x) => { - encoder = encoder.encode_raw(x); - } - NodeHash::Hashed(x) => { - encoder = encoder.encode_bytes(&x.0); - } - } + encoder = self.child.encode(encoder); encoder.finish(); buf } @@ -166,7 +159,7 @@ impl ExtensionNode { /// Inserts the node into the state and returns its hash pub fn insert_self(self, state: &mut TrieState) -> Result { let hash = self.compute_hash(); - state.insert_node(self.into(), hash.clone()); + state.insert_node(self.into(), hash); Ok(hash) } @@ -187,7 +180,7 @@ impl ExtensionNode { // Continue to child if path.skip_prefix(&self.prefix) { let child_node = state - .get_node(self.child.clone())? + .get_node(self.child)? .ok_or(TrieError::InconsistentTree)?; child_node.get_path(state, path, node_path)?; } diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index c8b9332203..bcac4c4222 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -101,7 +101,7 @@ impl LeafNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(self.encode_raw()) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node @@ -118,7 +118,7 @@ impl LeafNode { /// Receives the offset that needs to be traversed to reach the leaf node from the canonical root, used to compute the node hash pub fn insert_self(self, state: &mut TrieState) -> Result { let hash = self.compute_hash(); - state.insert_node(self.into(), hash.clone()); + state.insert_node(self.into(), hash); Ok(hash) } diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index e4a6397edf..8b712bb689 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -1,5 +1,5 @@ use ethereum_types::H256; -use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode}; +use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, structs::Encoder}; #[cfg(feature = "libmdbx")] use libmdbx::orm::{Decodable, Encodable}; use sha3::{Digest, Keccak256}; @@ -8,16 +8,17 @@ use sha3::{Digest, Keccak256}; /// If the encoded node is less than 32 bits, contains the encoded node itself // TODO: Check if we can omit the Inline variant, as nodes will always be bigger than 32 bits in our use case // TODO: Check if making this `Copy` can make the code less verbose at a reasonable performance cost -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NodeHash { Hashed(H256), - Inline(Vec), + // Inline is always len < 32. We need to store the length of the data, a u8 is enough. + Inline(([u8; 31], u8)), } impl AsRef<[u8]> for NodeHash { fn as_ref(&self) -> &[u8] { match self { - NodeHash::Inline(x) => x.as_ref(), + NodeHash::Inline((slice, len)) => &slice[0..(*len as usize)], NodeHash::Hashed(x) => x.as_bytes(), } } @@ -25,21 +26,39 @@ impl AsRef<[u8]> for NodeHash { impl NodeHash { /// Returns the `NodeHash` of an encoded node (encoded using the NodeEncoder) - pub fn from_encoded_raw(encoded: Vec) -> NodeHash { + pub fn from_encoded_raw(encoded: &[u8]) -> NodeHash { if encoded.len() >= 32 { - let hash = Keccak256::new_with_prefix(&encoded).finalize(); + let hash = Keccak256::new_with_prefix(encoded).finalize(); NodeHash::Hashed(H256::from_slice(hash.as_slice())) } else { - NodeHash::Inline(encoded) + NodeHash::from_slice(encoded) } } + + /// Converts a slice of an already hashed data (in case it's not inlineable) to a NodeHash. + /// + /// If you need to hash it in case its len >= 32 see `from_encoded_raw` + pub(crate) fn from_slice(slice: &[u8]) -> NodeHash { + match slice.len() { + 0..32 => { + let mut buffer = [0; 31]; + buffer[0..slice.len()].copy_from_slice(slice); + NodeHash::Inline((buffer, slice.len() as u8)) + } + _ => NodeHash::Hashed(H256::from_slice(slice)), + } + } + /// Returns the finalized hash /// NOTE: This will hash smaller nodes, only use to get the final root hash, not for intermediate node hashes pub fn finalize(self) -> H256 { match self { - NodeHash::Inline(x) => { - H256::from_slice(Keccak256::new().chain_update(&*x).finalize().as_slice()) - } + NodeHash::Inline(_) => H256::from_slice( + Keccak256::new() + .chain_update(self.as_ref()) + .finalize() + .as_slice(), + ), NodeHash::Hashed(x) => x, } } @@ -48,21 +67,31 @@ impl NodeHash { /// The hash will only be considered invalid if it is empty /// Aka if it has a default value instead of being a product of hash computation pub fn is_valid(&self) -> bool { - !matches!(self, NodeHash::Inline(v) if v.is_empty()) + !matches!(self, NodeHash::Inline(v) if v.1 == 0) } /// Const version of `Default` trait impl pub const fn const_default() -> Self { - Self::Inline(vec![]) + Self::Inline(([0; 31], 0)) + } + + /// Encodes this NodeHash with the given encoder. + pub fn encode<'a>(&self, mut encoder: Encoder<'a>) -> Encoder<'a> { + match self { + NodeHash::Inline(_) => { + encoder = encoder.encode_raw(self.as_ref()); + } + NodeHash::Hashed(_) => { + encoder = encoder.encode_bytes(self.as_ref()); + } + } + encoder } } impl From> for NodeHash { fn from(value: Vec) -> Self { - match value.len() { - 32 => NodeHash::Hashed(H256::from_slice(&value)), - _ => NodeHash::Inline(value), - } + NodeHash::from_slice(&value) } } @@ -74,19 +103,13 @@ impl From for NodeHash { impl From for Vec { fn from(val: NodeHash) -> Self { - match val { - NodeHash::Hashed(x) => x.0.to_vec(), - NodeHash::Inline(x) => x, - } + val.as_ref().to_vec() } } impl From<&NodeHash> for Vec { fn from(val: &NodeHash) -> Self { - match val { - NodeHash::Hashed(x) => x.0.to_vec(), - NodeHash::Inline(x) => x.clone(), - } + val.as_ref().to_vec() } } @@ -102,16 +125,13 @@ impl Encodable for NodeHash { #[cfg(feature = "libmdbx")] impl Decodable for NodeHash { fn decode(b: &[u8]) -> anyhow::Result { - Ok(match b.len() { - 32 => NodeHash::Hashed(H256::from_slice(b)), - _ => NodeHash::Inline(b.into()), - }) + Ok(NodeHash::from_slice(b)) } } impl Default for NodeHash { fn default() -> Self { - NodeHash::Inline(Vec::new()) + NodeHash::Inline(([0; 31], 0)) } } diff --git a/crates/common/trie/state.rs b/crates/common/trie/state.rs index 124b240071..893e882dc5 100644 --- a/crates/common/trie/state.rs +++ b/crates/common/trie/state.rs @@ -26,8 +26,8 @@ impl TrieState { /// Retrieves a node based on its hash pub fn get_node(&self, hash: NodeHash) -> Result, TrieError> { // Decode the node if it is inlined - if let NodeHash::Inline(encoded) = hash { - return Ok(Some(Node::decode_raw(&encoded)?)); + if let NodeHash::Inline(_) = hash { + return Ok(Some(Node::decode_raw(hash.as_ref())?)); } if let Some(node) = self.cache.get(&hash) { return Ok(Some(node.clone())); diff --git a/crates/common/trie/trie.rs b/crates/common/trie/trie.rs index dbe8e4e83f..9e1ebd8f82 100644 --- a/crates/common/trie/trie.rs +++ b/crates/common/trie/trie.rs @@ -73,7 +73,7 @@ impl Trie { if let Some(root) = &self.root { let root_node = self .state - .get_node(root.clone())? + .get_node(*root)? .ok_or(TrieError::InconsistentTree)?; root_node.get(&self.state, Nibbles::from_bytes(path)) } else { @@ -128,7 +128,7 @@ impl Trie { Ok(self .root .as_ref() - .map(|root| root.clone().finalize()) + .map(|root| root.finalize()) .unwrap_or(*EMPTY_TRIE_HASH)) } @@ -137,7 +137,7 @@ impl Trie { pub fn hash_no_commit(&self) -> H256 { self.root .as_ref() - .map(|root| root.clone().finalize()) + .map(|root| root.finalize()) .unwrap_or(*EMPTY_TRIE_HASH) } @@ -158,10 +158,10 @@ impl Trie { return Ok(node_path); }; // If the root is inlined, add it to the node_path - if let NodeHash::Inline(node) = root { - node_path.push(node.to_vec()); + if let NodeHash::Inline(_) = root { + node_path.push(root.as_ref().to_vec()); } - if let Some(root_node) = self.state.get_node(root.clone())? { + if let Some(root_node) = self.state.get_node(*root)? { root_node.get_path(&self.state, Nibbles::from_bytes(path), &mut node_path)?; } Ok(node_path) @@ -177,7 +177,7 @@ impl Trie { let Some(root_node) = self .root .as_ref() - .map(|root| self.state.get_node(root.clone())) + .map(|root| self.state.get_node(*root)) .transpose()? .flatten() else { @@ -230,7 +230,7 @@ impl Trie { } trie.root .as_ref() - .map(|root| root.clone().finalize()) + .map(|root| root.finalize()) .unwrap_or(*EMPTY_TRIE_HASH) } @@ -274,7 +274,7 @@ impl Trie { let Some(root_node) = self .root .as_ref() - .map(|root| self.state.get_node(root.clone())) + .map(|root| self.state.get_node(*root)) .transpose()? .flatten() else { @@ -295,7 +295,7 @@ impl Trie { if child_hash.is_valid() { let child_node = self .state - .get_node(child_hash.clone())? + .get_node(*child_hash)? .ok_or(TrieError::InconsistentTree)?; self.get_node_inner(child_node, partial_path) } else { @@ -310,7 +310,7 @@ impl Trie { { let child_node = self .state - .get_node(extension_node.child.clone())? + .get_node(extension_node.child)? .ok_or(TrieError::InconsistentTree)?; self.get_node_inner(child_node, partial_path) } else { diff --git a/crates/common/trie/trie_iter.rs b/crates/common/trie/trie_iter.rs index 5b90ddd13c..5526b62b2b 100644 --- a/crates/common/trie/trie_iter.rs +++ b/crates/common/trie/trie_iter.rs @@ -9,7 +9,7 @@ pub struct TrieIterator { impl TrieIterator { pub(crate) fn new(trie: Trie) -> Self { let stack = if let Some(root) = &trie.root { - vec![(Nibbles::default(), root.clone())] + vec![(Nibbles::default(), *root)] } else { vec![] }; @@ -34,7 +34,7 @@ impl Iterator for TrieIterator { if child.is_valid() { let mut child_path = path.clone(); child_path.append(choice as u8); - self.stack.push((child_path, child.clone())) + self.stack.push((child_path, *child)) } } } @@ -42,8 +42,7 @@ impl Iterator for TrieIterator { // Update path path.extend(&extension_node.prefix); // Add child to the stack - self.stack - .push((path.clone(), extension_node.child.clone())); + self.stack.push((path.clone(), extension_node.child)); } Node::Leaf(leaf) => { path.extend(&leaf.partial); diff --git a/crates/common/trie/verify_range.rs b/crates/common/trie/verify_range.rs index 51ae1978a5..358bdf422e 100644 --- a/crates/common/trie/verify_range.rs +++ b/crates/common/trie/verify_range.rs @@ -153,7 +153,7 @@ fn fill_node( let node = proof_nodes.get_node(node_hash)?; let child_hash = get_child(path, &node); if let Some(ref child_hash) = child_hash { - trie_state.insert_node(node, node_hash.clone()); + trie_state.insert_node(node, *node_hash); fill_node(path, child_hash, trie_state, proof_nodes) } else { let value = match &node { @@ -163,7 +163,7 @@ fn fill_node( .then_some(n.value.clone()) .unwrap_or_default(), }; - trie_state.insert_node(node, node_hash.clone()); + trie_state.insert_node(node, *node_hash); Ok(value) } } @@ -174,12 +174,12 @@ fn get_child<'a>(path: &'a mut Nibbles, node: &'a Node) -> Option { Node::Branch(n) => { if let Some(choice) = path.next_choice() { if n.choices[choice].is_valid() { - return Some(n.choices[choice].clone()); + return Some(n.choices[choice]); } } None } - Node::Extension(n) => path.skip_prefix(&n.prefix).then_some(n.child.clone()), + Node::Extension(n) => path.skip_prefix(&n.prefix).then_some(n.child), Node::Leaf(_) => None, } } @@ -212,7 +212,7 @@ fn has_right_element_inner( if n.choices[choice + 1..].iter().any(|child| child.is_valid()) { return Ok(true); } else if n.choices[choice].is_valid() { - return has_right_element_inner(n.choices[choice].clone(), path, trie_state); + return has_right_element_inner(n.choices[choice], path, trie_state); } } } @@ -261,7 +261,7 @@ fn remove_internal_references_inner( return Ok(true); } // We already looked up the nodes when filling the state so this shouldn't fail - let node = trie_state.get_node(node_hash.clone())?.unwrap(); + let node = trie_state.get_node(node_hash)?.unwrap(); match node { Node::Branch(mut n) => { // If none of the paths have a next choice nibble then it means that this is the end of the path @@ -276,7 +276,7 @@ fn remove_internal_references_inner( // Keep going // Check if the child extension node should be removed as a result of this process let should_remove = remove_internal_references_inner( - n.choices[left_choice].clone(), + n.choices[left_choice], left_path, right_path, trie_state, @@ -295,13 +295,9 @@ fn remove_internal_references_inner( } // Remove nodes on the left and right choice's subtries let should_remove_left = - remove_node(n.choices[left_choice].clone(), left_path, false, trie_state); - let should_remove_right = remove_node( - n.choices[right_choice].clone(), - right_path, - true, - trie_state, - ); + remove_node(n.choices[left_choice], left_path, false, trie_state); + let should_remove_right = + remove_node(n.choices[right_choice], right_path, true, trie_state); // Remove left and right child nodes if their subtries where wiped in the process if should_remove_left { n.choices[left_choice] = NodeHash::default(); @@ -373,7 +369,7 @@ fn remove_node( return false; } // We already looked up the nodes when filling the state so this shouldn't fail - let node = trie_state.get_node(node_hash.clone()).unwrap().unwrap(); + let node = trie_state.get_node(node_hash).unwrap().unwrap(); match node { Node::Branch(mut n) => { // Remove child nodes @@ -391,8 +387,7 @@ fn remove_node( } } // Remove nodes to the left/right of the choice's subtrie - let should_remove = - remove_node(n.choices[choice].clone(), path, remove_left, trie_state); + let should_remove = remove_node(n.choices[choice], path, remove_left, trie_state); if should_remove { n.choices[choice] = NodeHash::default(); } @@ -440,10 +435,10 @@ impl<'a> ProofNodeStorage<'a> { let Some(encoded) = self.nodes.get(hash.as_bytes()) else { return Err(TrieError::Verify(format!("proof node missing: {hash}"))); }; - *encoded + encoded.as_slice() } - NodeHash::Inline(ref encoded) => encoded, + NodeHash::Inline(_) => hash.as_ref(), }; Ok(Node::decode_raw(encoded)?) } diff --git a/crates/networking/p2p/sync.rs b/crates/networking/p2p/sync.rs index 4df3217064..d3ae9a2798 100644 --- a/crates/networking/p2p/sync.rs +++ b/crates/networking/p2p/sync.rs @@ -677,13 +677,13 @@ fn node_missing_children( match &node { Node::Branch(node) => { for (index, child) in node.choices.iter().enumerate() { - if child.is_valid() && trie_state.get_node(child.clone())?.is_none() { + if child.is_valid() && trie_state.get_node(*child)?.is_none() { paths.push(parent_path.append_new(index as u8)); } } } Node::Extension(node) => { - if node.child.is_valid() && trie_state.get_node(node.child.clone())?.is_none() { + if node.child.is_valid() && trie_state.get_node(node.child)?.is_none() { paths.push(parent_path.concat(node.prefix.clone())); } } From c536e41c4ad46493c418556e78ff75efabe47ee8 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Wed, 23 Apr 2025 12:16:54 +0200 Subject: [PATCH 4/6] perf(core): make redb TriuDb impl use NodeHash as keys too --- crates/storage/trie_db/redb.rs | 14 +++++++------- crates/storage/trie_db/redb_multitable.rs | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/storage/trie_db/redb.rs b/crates/storage/trie_db/redb.rs index abe086adc9..123e088138 100644 --- a/crates/storage/trie_db/redb.rs +++ b/crates/storage/trie_db/redb.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use ethrex_trie::{TrieDB, TrieError}; +use ethrex_trie::{NodeHash, TrieDB, TrieError}; use redb::{Database, TableDefinition}; const TABLE: TableDefinition<&[u8], &[u8]> = TableDefinition::new("Trie"); @@ -16,7 +16,7 @@ impl RedBTrie { } impl TrieDB for RedBTrie { - fn get(&self, key: Vec) -> Result>, TrieError> { + fn get(&self, key: NodeHash) -> Result>, TrieError> { let read_txn = self .db .begin_read() @@ -25,12 +25,12 @@ impl TrieDB for RedBTrie { .open_table(TABLE) .map_err(|e| TrieError::DbError(e.into()))?; Ok(table - .get(&*key) + .get(key.as_ref()) .map_err(|e| TrieError::DbError(e.into()))? .map(|value| value.value().to_vec())) } - fn put(&self, key: Vec, value: Vec) -> Result<(), TrieError> { + fn put(&self, key: NodeHash, value: Vec) -> Result<(), TrieError> { let write_txn = self .db .begin_write() @@ -40,7 +40,7 @@ impl TrieDB for RedBTrie { .open_table(TABLE) .map_err(|e| TrieError::DbError(e.into()))?; table - .insert(&*key, &*value) + .insert(key.as_ref(), &*value) .map_err(|e| TrieError::DbError(e.into()))?; } write_txn @@ -50,7 +50,7 @@ impl TrieDB for RedBTrie { Ok(()) } - fn put_batch(&self, key_values: Vec<(Vec, Vec)>) -> Result<(), TrieError> { + fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError> { let write_txn = self .db .begin_write() @@ -61,7 +61,7 @@ impl TrieDB for RedBTrie { .map_err(|e| TrieError::DbError(e.into()))?; for (key, value) in key_values { table - .insert(&*key, &*value) + .insert(key.as_ref(), &*value) .map_err(|e| TrieError::DbError(e.into()))?; } } diff --git a/crates/storage/trie_db/redb_multitable.rs b/crates/storage/trie_db/redb_multitable.rs index 736edeb880..606766aa5a 100644 --- a/crates/storage/trie_db/redb_multitable.rs +++ b/crates/storage/trie_db/redb_multitable.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use redb::{Database, MultimapTableDefinition}; -use ethrex_trie::{TrieDB, TrieError}; +use ethrex_trie::{NodeHash, TrieDB, TrieError}; use super::utils::node_hash_to_fixed_size; @@ -24,7 +24,7 @@ impl RedBMultiTableTrieDB { } impl TrieDB for RedBMultiTableTrieDB { - fn get(&self, key: Vec) -> Result>, TrieError> { + fn get(&self, key: NodeHash) -> Result>, TrieError> { let read_txn = self .db .begin_read() @@ -56,7 +56,7 @@ impl TrieDB for RedBMultiTableTrieDB { } } - fn put(&self, key: Vec, value: Vec) -> Result<(), TrieError> { + fn put(&self, key: NodeHash, value: Vec) -> Result<(), TrieError> { let write_txn = self .db .begin_write() @@ -76,7 +76,7 @@ impl TrieDB for RedBMultiTableTrieDB { Ok(()) } - fn put_batch(&self, key_values: Vec<(Vec, Vec)>) -> Result<(), TrieError> { + fn put_batch(&self, key_values: Vec<(NodeHash, Vec)>) -> Result<(), TrieError> { let write_txn = self .db .begin_write() From 21c67f45b036f379632b17cfa27c6be4de198038 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Wed, 23 Apr 2025 12:21:42 +0200 Subject: [PATCH 5/6] changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4290c7f9b..edad9f4f86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2025-04-23 + +- Make TrieDb trait use NodeHash as key [2517](https://github.com/lambdaclass/ethrex/pull/2517) + ### 2025-04-22 - Transform the inlined variant of NodeHash to a constant sized array [2516](https://github.com/lambdaclass/ethrex/pull/2516) From e92b11d22b1baac38bac154d6ecaec8de6862eee Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Wed, 23 Apr 2025 12:23:59 +0200 Subject: [PATCH 6/6] change nodehash libmdbx test a bit --- crates/storage/trie_db/libmdbx.rs | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/crates/storage/trie_db/libmdbx.rs b/crates/storage/trie_db/libmdbx.rs index 42bd5fcf1f..f437e8039b 100644 --- a/crates/storage/trie_db/libmdbx.rs +++ b/crates/storage/trie_db/libmdbx.rs @@ -59,14 +59,6 @@ mod test { use std::sync::Arc; use tempdir::TempDir; - pub const HELLO_NODEHASH: NodeHash = NodeHash::Inline(( - [ - 104, 101, 108, 108, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - ], - 2, - )); - #[test] fn simple_addition() { table!( @@ -74,10 +66,11 @@ mod test { ( Nodes ) NodeHash => Vec ); let inner_db = new_db::(); + let key = NodeHash::from_encoded_raw(b"hello"); let db = LibmdbxTrieDB::::new(inner_db); - assert_eq!(db.get(HELLO_NODEHASH).unwrap(), None); - db.put(HELLO_NODEHASH, "value".into()).unwrap(); - assert_eq!(db.get(HELLO_NODEHASH).unwrap(), Some("value".into())); + assert_eq!(db.get(key).unwrap(), None); + db.put(key, "value".into()).unwrap(); + assert_eq!(db.get(key).unwrap(), Some("value".into())); } #[test] @@ -97,8 +90,9 @@ mod test { let inner_db = Arc::new(Database::create(None, &tables).unwrap()); let db_a = LibmdbxTrieDB::::new(inner_db.clone()); let db_b = LibmdbxTrieDB::::new(inner_db.clone()); - db_a.put(HELLO_NODEHASH, "value".into()).unwrap(); - assert_eq!(db_b.get(HELLO_NODEHASH).unwrap(), None); + let key = NodeHash::from_encoded_raw(b"hello"); + db_a.put(key, "value".into()).unwrap(); + assert_eq!(db_b.get(key).unwrap(), None); } #[test]