Skip to content

perf(core): make TrieDb use NodeHash as key #2517

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Perf

### 2025-04-28

- Make TrieDb trait use NodeHash as key [2517](https://github.com/lambdaclass/ethrex/pull/2517)

### 2025-04-22

- Avoid calculating state transitions after every block in bulk mode [2519](https://github.com/lambdaclass/ethrex/pull/2519)
Expand Down
18 changes: 9 additions & 9 deletions crates/common/trie/db.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
use crate::error::TrieError;
use crate::{error::TrieError, NodeHash};
use std::{
collections::HashMap,
sync::{Arc, Mutex},
};

pub trait TrieDB: Send + Sync {
fn get(&self, key: Vec<u8>) -> Result<Option<Vec<u8>>, TrieError>;
fn put(&self, key: Vec<u8>, value: Vec<u8>) -> Result<(), TrieError>;
fn get(&self, key: NodeHash) -> Result<Option<Vec<u8>>, TrieError>;
fn put(&self, key: NodeHash, value: Vec<u8>) -> Result<(), TrieError>;
// fn put_batch(&self, key: Vec<u8>, value: Vec<u8>) -> Result<(), TrieError>;
fn put_batch(&self, key_values: Vec<(Vec<u8>, Vec<u8>)>) -> Result<(), TrieError>;
fn put_batch(&self, key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError>;
}

/// InMemory implementation for the TrieDB trait, with get and put operations.
pub struct InMemoryTrieDB {
inner: Arc<Mutex<HashMap<Vec<u8>, Vec<u8>>>>,
inner: Arc<Mutex<HashMap<NodeHash, Vec<u8>>>>,
}

impl InMemoryTrieDB {
pub const fn new(map: Arc<Mutex<HashMap<Vec<u8>, Vec<u8>>>>) -> Self {
pub const fn new(map: Arc<Mutex<HashMap<NodeHash, Vec<u8>>>>) -> Self {
Self { inner: map }
}
pub fn new_empty() -> Self {
Expand All @@ -28,7 +28,7 @@ impl InMemoryTrieDB {
}

impl TrieDB for InMemoryTrieDB {
fn get(&self, key: Vec<u8>) -> Result<Option<Vec<u8>>, TrieError> {
fn get(&self, key: NodeHash) -> Result<Option<Vec<u8>>, TrieError> {
Ok(self
.inner
.lock()
Expand All @@ -37,15 +37,15 @@ impl TrieDB for InMemoryTrieDB {
.cloned())
}

fn put(&self, key: Vec<u8>, value: Vec<u8>) -> Result<(), TrieError> {
fn put(&self, key: NodeHash, value: Vec<u8>) -> Result<(), TrieError> {
self.inner
.lock()
.map_err(|_| TrieError::LockError)?
.insert(key, value);
Ok(())
}

fn put_batch(&self, key_values: Vec<(Vec<u8>, Vec<u8>)>) -> Result<(), TrieError> {
fn put_batch(&self, key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError> {
let mut db = self.inner.lock().map_err(|_| TrieError::LockError)?;

for (key, value) in key_values {
Expand Down
14 changes: 14 additions & 0 deletions crates/common/trie/node_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ impl NodeHash {
}
encoder
}

pub fn len(&self) -> usize {
match self {
NodeHash::Hashed(h256) => h256.as_bytes().len(),
NodeHash::Inline(value) => value.1 as usize,
}
}

pub fn is_empty(&self) -> bool {
match self {
NodeHash::Hashed(h256) => h256.as_bytes().is_empty(),
NodeHash::Inline(value) => value.1 == 0,
}
}
}

impl From<Vec<u8>> for NodeHash {
Expand Down
10 changes: 5 additions & 5 deletions crates/common/trie/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl TrieState {
return Ok(Some(node.clone()));
};
self.db
.get(hash.into())?
.get(hash)?
.map(|rlp| Node::decode(&rlp).map_err(TrieError::RLPDecode))
.transpose()
}
Expand Down Expand Up @@ -68,7 +68,7 @@ impl TrieState {
fn commit_node_tail_recursive(
&mut self,
node_hash: &NodeHash,
acc: &mut Vec<(Vec<u8>, Vec<u8>)>,
acc: &mut Vec<(NodeHash, Vec<u8>)>,
) -> Result<(), TrieError> {
let Some(node) = self.cache.remove(node_hash) else {
// If the node is not in the cache then it means it is already stored in the DB
Expand All @@ -87,7 +87,7 @@ impl TrieState {
Node::Leaf(_) => {}
}
// Commit self
acc.push((node_hash.into(), node.encode_to_vec()));
acc.push((*node_hash, node.encode_to_vec()));

Ok(())
}
Expand All @@ -96,7 +96,7 @@ impl TrieState {
pub fn write_node(&mut self, node: Node, hash: NodeHash) -> Result<(), TrieError> {
// Don't insert the node if it is already inlined on the parent
if matches!(hash, NodeHash::Hashed(_)) {
self.db.put(hash.into(), node.encode_to_vec())?;
self.db.put(hash, node.encode_to_vec())?;
}
Ok(())
}
Expand All @@ -108,7 +108,7 @@ impl TrieState {
.iter()
.filter_map(|node| {
let hash = node.compute_hash();
matches!(hash, NodeHash::Hashed(_)).then(|| (hash.into(), node.encode_to_vec()))
matches!(hash, NodeHash::Hashed(_)).then(|| (hash, node.encode_to_vec()))
})
.collect();
self.db.put_batch(key_values)?;
Expand Down
11 changes: 5 additions & 6 deletions crates/common/trie/trie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@ mod trie_iter;
mod verify_range;
use ethereum_types::H256;
use ethrex_rlp::constants::RLP_NULL;
use node_hash::NodeHash;
use sha3::{Digest, Keccak256};
use std::collections::HashSet;

pub use self::db::{InMemoryTrieDB, TrieDB};
pub use self::nibbles::Nibbles;
pub use self::verify_range::verify_range;
pub use self::{node::Node, state::TrieState};
pub use self::{node::Node, node_hash::NodeHash, state::TrieState};

pub use self::error::TrieError;
use self::{node::LeafNode, trie_iter::TrieIterator};
Expand Down Expand Up @@ -241,15 +240,15 @@ impl Trie {
struct NullTrieDB;

impl TrieDB for NullTrieDB {
fn get(&self, _key: Vec<u8>) -> Result<Option<Vec<u8>>, TrieError> {
fn get(&self, _key: NodeHash) -> Result<Option<Vec<u8>>, TrieError> {
Ok(None)
}

fn put(&self, _key: Vec<u8>, _value: Vec<u8>) -> Result<(), TrieError> {
fn put(&self, _key: NodeHash, _value: Vec<u8>) -> Result<(), TrieError> {
Ok(())
}

fn put_batch(&self, _key_values: Vec<(Vec<u8>, Vec<u8>)>) -> Result<(), TrieError> {
fn put_batch(&self, _key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError> {
Ok(())
}
}
Expand Down Expand Up @@ -340,7 +339,7 @@ impl Trie {
use std::sync::Arc;
use std::sync::Mutex;

let hmap: HashMap<Vec<u8>, Vec<u8>> = HashMap::new();
let hmap: HashMap<NodeHash, Vec<u8>> = HashMap::new();
let map = Arc::new(Mutex::new(hmap));
let db = InMemoryTrieDB::new(map);
Trie::new(Box::new(db))
Expand Down
4 changes: 2 additions & 2 deletions crates/storage/store_db/in_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ use ethrex_common::types::{
payload::PayloadBundle, AccountState, Block, BlockBody, BlockHash, BlockHeader, BlockNumber,
ChainConfig, Index, Receipt,
};
use ethrex_trie::{InMemoryTrieDB, Nibbles, Trie};
use ethrex_trie::{InMemoryTrieDB, Nibbles, NodeHash, Trie};
use std::{
collections::{BTreeMap, HashMap},
fmt::Debug,
sync::{Arc, Mutex, MutexGuard},
};

pub type NodeMap = Arc<Mutex<HashMap<Vec<u8>, Vec<u8>>>>;
pub type NodeMap = Arc<Mutex<HashMap<NodeHash, Vec<u8>>>>;

#[derive(Default, Clone)]
pub struct Store(Arc<Mutex<StoreInner>>);
Expand Down
4 changes: 2 additions & 2 deletions crates/storage/store_db/libmdbx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use ethrex_common::types::{
use ethrex_rlp::decode::RLPDecode;
use ethrex_rlp::encode::RLPEncode;
use ethrex_rlp::error::RLPDecodeError;
use ethrex_trie::{Nibbles, Trie};
use ethrex_trie::{Nibbles, NodeHash, Trie};
use libmdbx::orm::{Decodable, DupSort, Encodable, Table};
use libmdbx::{
dupsort,
Expand Down Expand Up @@ -1150,7 +1150,7 @@ table!(

table!(
/// state trie nodes
( StateTrieNodes ) Vec<u8> => Vec<u8>
( StateTrieNodes ) NodeHash => Vec<u8>
);

// Local Blocks
Expand Down
31 changes: 17 additions & 14 deletions crates/storage/trie_db/libmdbx.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use ethrex_trie::error::TrieError;
use ethrex_trie::{error::TrieError, NodeHash};
use libmdbx::orm::{Database, Table};
use std::{marker::PhantomData, sync::Arc};
/// Libmdbx implementation for the TrieDB trait, with get and put operations.
Expand All @@ -11,7 +11,7 @@ use ethrex_trie::TrieDB;

impl<T> LibmdbxTrieDB<T>
where
T: Table<Key = Vec<u8>, Value = Vec<u8>>,
T: Table<Key = NodeHash, Value = Vec<u8>>,
{
pub fn new(db: Arc<Database>) -> Self {
Self {
Expand All @@ -23,20 +23,20 @@ where

impl<T> TrieDB for LibmdbxTrieDB<T>
where
T: Table<Key = Vec<u8>, Value = Vec<u8>>,
T: Table<Key = NodeHash, Value = Vec<u8>>,
{
fn get(&self, key: Vec<u8>) -> Result<Option<Vec<u8>>, TrieError> {
fn get(&self, key: NodeHash) -> Result<Option<Vec<u8>>, TrieError> {
let txn = self.db.begin_read().map_err(TrieError::DbError)?;
txn.get::<T>(key).map_err(TrieError::DbError)
}

fn put(&self, key: Vec<u8>, value: Vec<u8>) -> Result<(), TrieError> {
fn put(&self, key: NodeHash, value: Vec<u8>) -> Result<(), TrieError> {
let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?;
txn.upsert::<T>(key, value).map_err(TrieError::DbError)?;
txn.commit().map_err(TrieError::DbError)
}

fn put_batch(&self, key_values: Vec<(Vec<u8>, Vec<u8>)>) -> Result<(), TrieError> {
fn put_batch(&self, key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError> {
let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?;
for (key, value) in key_values {
txn.upsert::<T>(key, value).map_err(TrieError::DbError)?;
Expand All @@ -49,6 +49,7 @@ where
mod test {
use super::LibmdbxTrieDB;
use crate::trie_db::test_utils::libmdbx::{new_db, TestNodes};
use ethrex_trie::NodeHash;
use ethrex_trie::Trie;
use ethrex_trie::TrieDB;
use libmdbx::{
Expand All @@ -62,24 +63,25 @@ mod test {
fn simple_addition() {
table!(
/// NodeHash to Node table
( Nodes ) Vec<u8> => Vec<u8>
( Nodes ) NodeHash => Vec<u8>
);
let inner_db = new_db::<Nodes>();
let key = NodeHash::from_encoded_raw(b"hello");
let db = LibmdbxTrieDB::<Nodes>::new(inner_db);
assert_eq!(db.get("hello".into()).unwrap(), None);
db.put("hello".into(), "value".into()).unwrap();
assert_eq!(db.get("hello".into()).unwrap(), Some("value".into()));
assert_eq!(db.get(key).unwrap(), None);
db.put(key, "value".into()).unwrap();
assert_eq!(db.get(key).unwrap(), Some("value".into()));
}

#[test]
fn different_tables() {
table!(
/// vec to vec
( TableA ) Vec<u8> => Vec<u8>
( TableA ) NodeHash => Vec<u8>
);
table!(
/// vec to vec
( TableB ) Vec<u8> => Vec<u8>
( TableB ) NodeHash => Vec<u8>
);
let tables = [table_info!(TableA), table_info!(TableB)]
.into_iter()
Expand All @@ -88,8 +90,9 @@ mod test {
let inner_db = Arc::new(Database::create(None, &tables).unwrap());
let db_a = LibmdbxTrieDB::<TableA>::new(inner_db.clone());
let db_b = LibmdbxTrieDB::<TableB>::new(inner_db.clone());
db_a.put("hello".into(), "value".into()).unwrap();
assert_eq!(db_b.get("hello".into()).unwrap(), None);
let key = NodeHash::from_encoded_raw(b"hello");
db_a.put(key, "value".into()).unwrap();
assert_eq!(db_b.get(key).unwrap(), None);
}

#[test]
Expand Down
24 changes: 13 additions & 11 deletions crates/storage/trie_db/libmdbx_dupsort.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::{marker::PhantomData, sync::Arc};

use super::utils::node_hash_to_fixed_size;
use ethrex_trie::error::TrieError;
use ethrex_trie::TrieDB;
use ethrex_trie::{error::TrieError, NodeHash};
use libmdbx::orm::{Database, DupSort, Encodable};

/// Libmdbx implementation for the TrieDB trait for a dupsort table with a fixed primary key.
Expand Down Expand Up @@ -37,13 +37,13 @@ where
T: DupSort<Key = (SK, [u8; 33]), SeekKey = SK, Value = Vec<u8>>,
SK: Clone + Encodable,
{
fn get(&self, key: Vec<u8>) -> Result<Option<Vec<u8>>, TrieError> {
fn get(&self, key: NodeHash) -> Result<Option<Vec<u8>>, TrieError> {
let txn = self.db.begin_read().map_err(TrieError::DbError)?;
txn.get::<T>((self.fixed_key.clone(), node_hash_to_fixed_size(key)))
.map_err(TrieError::DbError)
}

fn put(&self, key: Vec<u8>, value: Vec<u8>) -> Result<(), TrieError> {
fn put(&self, key: NodeHash, value: Vec<u8>) -> Result<(), TrieError> {
let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?;
txn.upsert::<T>(
(self.fixed_key.clone(), node_hash_to_fixed_size(key)),
Expand All @@ -53,7 +53,7 @@ where
txn.commit().map_err(TrieError::DbError)
}

fn put_batch(&self, key_values: Vec<(Vec<u8>, Vec<u8>)>) -> Result<(), TrieError> {
fn put_batch(&self, key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError> {
let txn = self.db.begin_readwrite().map_err(TrieError::DbError)?;
for (key, value) in key_values {
txn.upsert::<T>(
Expand Down Expand Up @@ -82,19 +82,21 @@ mod test {
fn simple_addition() {
let inner_db = new_db::<Nodes>();
let db = LibmdbxDupsortTrieDB::<Nodes, [u8; 32]>::new(inner_db, [5; 32]);
assert_eq!(db.get("hello".into()).unwrap(), None);
db.put("hello".into(), "value".into()).unwrap();
assert_eq!(db.get("hello".into()).unwrap(), Some("value".into()));
let key = NodeHash::from_encoded_raw(b"hello");
assert_eq!(db.get(key).unwrap(), None);
db.put(key, "value".into()).unwrap();
assert_eq!(db.get(key).unwrap(), Some("value".into()));
}

#[test]
fn different_keys() {
let inner_db = new_db::<Nodes>();
let db_a = LibmdbxDupsortTrieDB::<Nodes, [u8; 32]>::new(inner_db.clone(), [5; 32]);
let db_b = LibmdbxDupsortTrieDB::<Nodes, [u8; 32]>::new(inner_db, [7; 32]);
db_a.put("hello".into(), "hello!".into()).unwrap();
db_b.put("hello".into(), "go away!".into()).unwrap();
assert_eq!(db_a.get("hello".into()).unwrap(), Some("hello!".into()));
assert_eq!(db_b.get("hello".into()).unwrap(), Some("go away!".into()));
let key = NodeHash::from_encoded_raw(b"hello");
db_a.put(key, "hello!".into()).unwrap();
db_b.put(key, "go away!".into()).unwrap();
assert_eq!(db_a.get(key).unwrap(), Some("hello!".into()));
assert_eq!(db_b.get(key).unwrap(), Some("go away!".into()));
}
}
Loading