Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ for block in blockchain.get_unordered_blocks():
for tx in block.transactions:
for no, output in enumerate(tx.outputs):
print("tx=%s outputno=%d type=%s value=%s" % (tx.hash, no, output.type, output.value))

# To get the blocks ordered by height, you need to provide the path of the
# `index` directory (LevelDB index) being maintained by bitcoind. It contains
# .ldb files and is present inside the `blocks` directory
for block in blockchain.get_ordered_blocks(sys.argv[1] + '/index', end=1000):
print("height=%d block=%s" % (block.height, block.hash))
```

More examples are available in the examples directory.
Expand Down
4 changes: 2 additions & 2 deletions blockchain_parser/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ class Block(object):
Represents a Bitcoin block, contains its header and its transactions.
"""

def __init__(self, raw_hex):
def __init__(self, raw_hex, height = None):
self.hex = raw_hex
self._hash = None
self._transactions = None
self._header = None
self._n_transactions = None
self.size = len(raw_hex)
self.height = None
self.height = height

def __repr__(self):
return "Block(%s)" % self.hash
Expand Down
42 changes: 42 additions & 0 deletions blockchain_parser/blockchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
import os
import mmap
import struct
import stat
import leveldb

from .block import Block
from .index import DBBlockIndex
from .utils import format_hash


# Constant separating blocks in the .blk files
Expand All @@ -25,6 +29,8 @@ def get_files(path):
Given the path to the .bitcoin directory, returns the sorted list of .blk
files contained in that directory
"""
if not stat.S_ISDIR(os.stat(path)[stat.ST_MODE]):
return [path]
files = os.listdir(path)
files = [f for f in files if f.startswith("blk") and f.endswith(".dat")]
files = map(lambda x: os.path.join(path, x), files)
Expand Down Expand Up @@ -57,6 +63,13 @@ def get_blocks(blockfile):
offset += 1
raw_data.close()

def get_block(blockfile, offset):
"""Extracts a single block from the blockfile at the given offset"""
with open(blockfile, "rb") as f:
f.seek(offset - 4) # Size is present 4 bytes before the db offset
size, = struct.unpack("<I", f.read(4))
return f.read(size)


class Blockchain(object):
"""Represent the blockchain contained in the series of .blk files
Expand All @@ -65,6 +78,8 @@ class Blockchain(object):

def __init__(self, path):
self.path = path
self.blockIndexes = None
self.indexPath = None

def get_unordered_blocks(self):
"""Yields the blocks contained in the .blk files as is,
Expand All @@ -73,3 +88,30 @@ def get_unordered_blocks(self):
for blk_file in get_files(self.path):
for raw_block in get_blocks(blk_file):
yield Block(raw_block)

def __getBlockIndexes(self, index):
"""There is no method of leveldb to close the db (and release the lock).
This creates problem during concurrent operations.
This function also provides caching of indexes.
"""
if self.indexPath != index:
db = leveldb.LevelDB(index)
self.blockIndexes = [DBBlockIndex(format_hash(k[1:]), v)
for k, v in db.RangeIter() if k[0] == ord('b')]
self.blockIndexes.sort(key = lambda x: x.height)
self.indexPath = index
return self.blockIndexes

def get_ordered_blocks(self, index, start=0, end=None):
"""Yields the blocks contained in the .blk files as per
the heigt extract from the leveldb index present at path
index maintained by bitcoind.
"""
blockIndexes = self.__getBlockIndexes(index)

if end is None:
end = len(blockIndexes)

for blkIdx in blockIndexes[start:end]:
blkFile = os.path.join(self.path, "blk%05d.dat" % blkIdx.nFile)
yield Block(get_block(blkFile, blkIdx.dataPos), blkIdx.height)
58 changes: 58 additions & 0 deletions blockchain_parser/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from struct import unpack

from .utils import format_hash

BLOCK_HAVE_DATA = 8
BLOCK_HAVE_UNDO = 16

def readVarInt(raw_hex):
"""
Reads the wierd format of VarInt present in src/serialize.h of bitcoin core
and being used for storing data in the leveldb.
This is not the VARINT format described for general bitcoin serialization
use.
"""
n = 0
pos = 0
while True:
chData = raw_hex[pos]
pos += 1
n = (n << 7) | (chData & 0x7f)
if chData & 0x80 == 0:
return (n, pos)
n += 1

class DBBlockIndex():
def __init__(self, blk_hash, raw_hex):
self.hash = blk_hash
pos = 0
nVersion, i = readVarInt(raw_hex[pos:])
pos += i
self.height, i = readVarInt(raw_hex[pos:])
pos += i
self.status, i = readVarInt(raw_hex[pos:])
pos += i
self.n_tx, i = readVarInt(raw_hex[pos:])
pos += i
if self.status & (BLOCK_HAVE_DATA | BLOCK_HAVE_UNDO):
self.nFile, i = readVarInt(raw_hex[pos:])
pos += i
else:
self.nFile = -1

if self.status & BLOCK_HAVE_DATA:
self.dataPos, i = readVarInt(raw_hex[pos:])
pos += i
else:
dataPos = -1
if self.status & BLOCK_HAVE_UNDO:
self.undoPos, i = readVarInt(raw_hex[pos:])
pos += i

assert(pos + 80 == len(raw_hex))
self.version, pHashi, mHashi, time, bits, self.nounce = unpack("<I32s32sIII", raw_hex[-80:])
self.prevHash = format_hash(pHashi)
self.merkelroot = format_hash(mHashi)

def __repr__(self):
return "DBBlockIndex(%s, height=%d, file_no=%d, file_pos=%d)" % (self.hash, self.height, self.nFile, self.dataPos)
29 changes: 29 additions & 0 deletions blockchain_parser/tests/test_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
from binascii import a2b_hex
from datetime import datetime

from blockchain_parser.index import DBBlockIndex

class TestDBIndex(unittest.TestCase):
def test_from_hex(self):
key_str = "0000000000000000169cdec8dcfa2e408f59e0d50b1a228f65d8f5480f" \
"990000"
value_str = "88927193a7021d8160804aaa89fc0185b6e81e02000000fb759231e1" \
"fa5f80c3508e3a59ebf301930257d04aa492070000000000000000c1" \
"1c6bc67af8264be7979db45043f5f5c1e8d2060082af4ce7957658a2" \
"2147e30bf97f54747b1b187d1eac41"

value_hex = a2b_hex(value_str)
idx = DBBlockIndex(key_str, value_hex)

self.assertEqual(idx.hash, "0000000000000000169cdec8dcfa2e408f59e0d50b1a228f65d8f5480f990000")
self.assertEqual(idx.height, 332802)
self.assertEqual(idx.status, 29)
self.assertEqual(idx.n_tx, 352)
self.assertEqual(idx.nFile, 202)
self.assertEqual(idx.dataPos, 90357377)
self.assertEqual(idx.undoPos, 13497502)
self.assertEqual(idx.version, 2)
self.assertEqual(idx.nounce, 1101799037)
self.assertEqual(idx.prevHash, "00000000000000000792a44ad057029301f3eb593a8e50c3805ffae1319275fb")
self.assertEqual(idx.merkelroot, "e34721a2587695e74caf820006d2e8c1f5f54350b49d97e74b26f87ac66b1cc1")
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@
],
install_requires=[
'python-bitcoinlib==0.5.0',
'leveldb==0.194',
]
)