Skip to content

Commit 88ba77a

Browse files
committed
archive: optimize Azure download
The read()s of the Azure blob storage classes seem to be synchronous. Wrap them into a big buffer for optimal download speed.
1 parent 889fabc commit 88ba77a

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

pym/bob/archive.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import gzip
3636
import hashlib
3737
import http.client
38+
import io
3839
import os
3940
import os.path
4041
import signal
@@ -972,6 +973,25 @@ def __exit__(self, exc_type, exc_value, traceback):
972973
return False
973974

974975

976+
class AzureStreamReadAdapter(io.RawIOBase):
977+
def __init__(self, raw):
978+
super().__init__()
979+
self.raw = raw
980+
def readable(self):
981+
return True
982+
def seekable(self):
983+
return False
984+
def writable(self):
985+
return False
986+
def read(self, size = -1):
987+
return self.raw.read(size)
988+
def readall(self):
989+
return self.raw.read()
990+
def readinto(self, buf):
991+
data = self.raw.read(len(buf))
992+
buf[0:len(data)] = data
993+
return len(data)
994+
975995
class AzureArchive(BaseArchive):
976996
def __init__(self, spec):
977997
super().__init__(spec)
@@ -1002,6 +1022,8 @@ def _openDownloadFile(self, buildId, suffix):
10021022
from azure.core.exceptions import AzureError, ResourceNotFoundError
10031023
try:
10041024
stream = client.download_blob(self.__makeBlobName(buildId, suffix))
1025+
stream = AzureStreamReadAdapter(stream) # Make io.RawIOBase compatible
1026+
stream = io.BufferedReader(stream, 1048576) # 1MiB buffer. Azure read()s are synchronous.
10051027
ret = AzureDownloader(client, stream)
10061028
client = None
10071029
return ret

0 commit comments

Comments
 (0)