From 22f4af5453cb4e0f4c4ad75c0eb5420fd89d701c Mon Sep 17 00:00:00 2001 From: Brannon Dorsey Date: Mon, 12 Mar 2018 17:03:50 -0500 Subject: [PATCH 1/4] Add LevelDB caching to Blockchain.get_ordered_blocks(...) using pickle https://github.com/alecalve/python-bitcoin-blockchain-parser/issues/39. Update README. --- README.md | 40 +++++++++++++++++++++++++++------ blockchain_parser/blockchain.py | 19 ++++++++++++++-- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 8e76a78..c57701b 100644 --- a/README.md +++ b/README.md @@ -5,34 +5,60 @@ This Python 3 library provides a parser for the raw data stored by bitcoind. - Detects outputs types - Detects addresses in outputs - Interprets scripts +- Supports SegWit +- Supports ordered block parsing ## Examples +Below are two basic examples for parsing the blockchain. More examples are available in the examples directory. + +### Unordered Blocks + +This blockchain parser parses raw blocks saved in Bitcoin Core's `.blk` file format. Bitcoin Core does not guarantee that these blocks are saved in order. If your application does not require that blocks are parsed in order, the `Blockchain.get_unordered_blocks(...)` method can be used: + ```python -import sys +import os from blockchain_parser.blockchain import Blockchain # Instantiate the Blockchain by giving the path to the directory # containing the .blk files created by bitcoind -blockchain = Blockchain(sys.argv[1]) +blockchain = Blockchain(os.path.expanduser('~/.bitcoin/blocks')) for block in blockchain.get_unordered_blocks(): for tx in block.transactions: for no, output in enumerate(tx.outputs): print("tx=%s outputno=%d type=%s value=%s" % (tx.hash, no, output.type, output.value)) +``` + +### Ordered Blocks + +If maintaining block order is necessary for your application, you should use the `Blockchain.get_ordered_blocks(...)` method. This method uses Bitcoin Core's LevelDB index to locate ordered block data in it's `.blk` files. + +```python +import os +from blockchain_parser.blockchain import Blockchain # To get the blocks ordered by height, you need to provide the path of the # `index` directory (LevelDB index) being maintained by bitcoind. It contains # .ldb files and is present inside the `blocks` directory. -for block in blockchain.get_ordered_blocks(sys.argv[1] + '/index', end=1000): +for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), end=1000): print("height=%d block=%s" % (block.height, block.hash)) +``` + +Blocks can be iterated in reverse by specifying a start parameter that is greater than the end parameter. + +```python +for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), start=510000, end=0): + print("height=%d block=%s" % (block.height, block.hash)) +``` -# Blocks can be iterated in reverse by specifying a start parameter that is -# greater than the end parameter. -for block in blockchain.get_ordered_blocks(sys.argv[1] + '/index', start=510000, end=0): +Building the LevelDB index can take a while which can make iterative development and debugging challenging. For this reason, `Blockchain.get_ordered_blocks(...)` supports caching the LevelDB index database using [pickle](https://docs.python.org/3.6/library/pickle.html). To use a cache simply pass `cache=filename` to the ordered blocks method. If the cached file does not exist it will be created for faster parsing the next time the method is run. If the cached file already exists it will be used instead of re-parsing the LevelDB database. + +```python +for block in blockchain.get_ordered_blocks(os.path.expanduser('~/.bitcoin/blocks/index'), cache='index-cache.pickle'): print("height=%d block=%s" % (block.height, block.hash)) ``` -More examples are available in the examples directory. +**NOTE**: You must manually/programmatically delete the cache file in order to rebuild the cache. Don't forget to do this each time you would like to re-parse the blockchain with a higher block height than the first time you saved the cache file as the new blocks will not be included in the cache. ## Installing diff --git a/blockchain_parser/blockchain.py b/blockchain_parser/blockchain.py index 0a774cf..83336d3 100644 --- a/blockchain_parser/blockchain.py +++ b/blockchain_parser/blockchain.py @@ -12,6 +12,7 @@ import os import mmap import struct +import pickle import stat import plyvel @@ -145,12 +146,26 @@ def _index_confirmed(self, chain_indexes, num_confirmations=6): if first_block.hash in chain: return True else: return False - def get_ordered_blocks(self, index, start=0, end=None): + def get_ordered_blocks(self, index, start=0, end=None, cache=None): """Yields the blocks contained in the .blk files as per the heigt extract from the leveldb index present at path index maintained by bitcoind. """ - blockIndexes = self.__getBlockIndexes(index) + + blockIndexes = None + + if cache and os.path.exists(cache): + # load the block index cache from a previous index + with open(cache, 'rb') as f: + blockIndexes = pickle.load(f) + + if blockIndexes == None: + # build the block index + blockIndexes = self.__getBlockIndexes(index) + if cache and not os.path.exists(cache): + # cache the block index for re-use next time + with open(cache, 'wb') as f: + pickle.dump(blockIndexes, f) # remove small forks that may have occured while the node was running live. # Occassionally a node will receive two different solutions to the next block From 2342865da4dbac94ae99771475d716c9e270129a Mon Sep 17 00:00:00 2001 From: Brannon Dorsey Date: Thu, 15 Mar 2018 14:37:31 -0500 Subject: [PATCH 2/4] Fix PEP formatting warnings --- blockchain_parser/blockchain.py | 52 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/blockchain_parser/blockchain.py b/blockchain_parser/blockchain.py index 83336d3..131f006 100644 --- a/blockchain_parser/blockchain.py +++ b/blockchain_parser/blockchain.py @@ -64,10 +64,11 @@ def get_blocks(blockfile): offset += 1 raw_data.close() + def get_block(blockfile, offset): """Extracts a single block from the blockfile at the given offset""" with open(blockfile, "rb") as f: - f.seek(offset - 4) # Size is present 4 bytes before the db offset + f.seek(offset - 4) # Size is present 4 bytes before the db offset size, = struct.unpack(" -1: - # if this block is the same height as the last block an orphan has + # if this block is the same height as the last block an orphan # occurred, now we have to figure out which of the two to keep if blockIdx.height == last_height: - # loop through future blocks until we find a chain of at least - # six blocks that includes this block. If we can't find one - # remove this block as it is invalid + # loop through future blocks until we find a chain 6 blocks + # long that includes this block. If we can't find one remove + # this block as it is invalid if self._index_confirmed(blockIndexes[i:]): # if this block is confirmed, the unconfirmed block is @@ -199,7 +202,8 @@ def get_ordered_blocks(self, index, start=0, end=None, cache=None): last_height = blockIdx.height # filter out the orphan blocks, so we are left only with block indexes - # that have been confirmed (or are new enough that they haven't yet been confirmed) + # that have been confirmed + # (or are new enough that they haven't yet been confirmed) blockIndexes = list(filter(lambda block: block.hash not in orphans, blockIndexes)) if end is None: From 3b6659f40c17dd70dc6ef61464b16fe8b86df736 Mon Sep 17 00:00:00 2001 From: Brannon Dorsey Date: Thu, 15 Mar 2018 14:41:41 -0500 Subject: [PATCH 3/4] More PEP formatting. --- blockchain_parser/blockchain.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/blockchain_parser/blockchain.py b/blockchain_parser/blockchain.py index 131f006..2da67b4 100644 --- a/blockchain_parser/blockchain.py +++ b/blockchain_parser/blockchain.py @@ -99,7 +99,7 @@ def __getBlockIndexes(self, index): if self.indexPath != index: db = plyvel.DB(index, compression=None) self.blockIndexes = [DBBlockIndex(format_hash(k[1:]), v) - for k, v in db.iterator() if k[0] == ord('b')] + for k, v in db.iterator() if k[0] == ord('b')] db.close() self.blockIndexes.sort(key=lambda x: x.height) self.indexPath = index @@ -144,7 +144,7 @@ def _index_confirmed(self, chain_indexes, num_confirmations=6): # we are ready to make a decesion on whether or not the block # belongs to a fork or the main chain if len(chain) == num_confirmations: - if first_block.hash in chain: + if first_block.hash in chain: return True else: return False @@ -187,8 +187,8 @@ def get_ordered_blocks(self, index, start=0, end=None, cache=None): if blockIdx.height == last_height: # loop through future blocks until we find a chain 6 blocks - # long that includes this block. If we can't find one remove - # this block as it is invalid + # long that includes this block. If we can't find one + # remove this block as it is invalid if self._index_confirmed(blockIndexes[i:]): # if this block is confirmed, the unconfirmed block is @@ -202,7 +202,7 @@ def get_ordered_blocks(self, index, start=0, end=None, cache=None): last_height = blockIdx.height # filter out the orphan blocks, so we are left only with block indexes - # that have been confirmed + # that have been confirmed # (or are new enough that they haven't yet been confirmed) blockIndexes = list(filter(lambda block: block.hash not in orphans, blockIndexes)) From 622949c94c8c6a75c54b984af22ab2be126f1f8e Mon Sep 17 00:00:00 2001 From: Brannon Dorsey Date: Thu, 15 Mar 2018 14:42:41 -0500 Subject: [PATCH 4/4] More PEP formatting. --- blockchain_parser/blockchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockchain_parser/blockchain.py b/blockchain_parser/blockchain.py index 2da67b4..bb826ae 100644 --- a/blockchain_parser/blockchain.py +++ b/blockchain_parser/blockchain.py @@ -146,7 +146,7 @@ def _index_confirmed(self, chain_indexes, num_confirmations=6): if len(chain) == num_confirmations: if first_block.hash in chain: return True - else: + else: return False def get_ordered_blocks(self, index, start=0, end=None, cache=None):