Skip to content

Commit a7220dd

Browse files
committed
Merge pull request #20 from Changaco/links
Better support of links
2 parents cb49977 + bd232d3 commit a7220dd

19 files changed

+968
-9
lines changed

libarchive/entry.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,30 +61,57 @@ def isfifo(self):
6161

6262
@property
6363
def islnk(self):
64+
return bool(ffi.entry_hardlink_w(self._entry_p) or
65+
ffi.entry_hardlink(self._entry_p))
66+
67+
@property
68+
def issym(self):
6469
return self.filetype & 0o170000 == 0o120000
6570

71+
def _linkpath(self):
72+
return (ffi.entry_symlink_w(self._entry_p) or
73+
ffi.entry_hardlink_w(self._entry_p) or
74+
ffi.entry_symlink(self._entry_p) or
75+
ffi.entry_hardlink(self._entry_p))
76+
77+
# aliases to get the same api as tarfile
78+
linkpath = property(_linkpath)
79+
linkname = property(_linkpath)
80+
6681
@property
6782
def isreg(self):
6883
return self.filetype & 0o170000 == 0o100000
6984

85+
@property
86+
def isfile(self):
87+
return self.isreg
88+
7089
@property
7190
def issock(self):
7291
return self.filetype & 0o170000 == 0o140000
7392

93+
@property
94+
def isdev(self):
95+
return self.ischr or self.isblk or self.isfifo or self.issock
96+
7497
@property
7598
def mtime(self):
7699
return ffi.entry_mtime(self._entry_p)
77100

78-
@property
79-
def pathname(self):
80-
return ffi.entry_pathname_w(self._entry_p)
101+
def _getpathname(self):
102+
return (ffi.entry_pathname_w(self._entry_p) or
103+
ffi.entry_pathname(self._entry_p))
81104

82-
@pathname.setter
83-
def pathname(self, value):
105+
def _setpathname(self, value):
84106
if not isinstance(value, bytes):
85107
value = value.encode('utf8')
86108
ffi.entry_update_pathname_utf8(self._entry_p, c_char_p(value))
87109

110+
pathname = property(_getpathname, _setpathname)
111+
# aliases to get the same api as tarfile
112+
path = property(_getpathname, _setpathname)
113+
name = property(_getpathname, _setpathname)
114+
88115
@property
89116
def size(self):
90117
if ffi.entry_size_is_set(self._entry_p):
@@ -96,4 +123,6 @@ def mode(self):
96123

97124
@property
98125
def strmode(self):
99-
return ffi.entry_strmode(self._entry_p)
126+
# note we strip the mode because archive_entry_strmode
127+
# returns a trailing space: strcpy(bp, "?rwxrwxrwx ");
128+
return ffi.entry_strmode(self._entry_p).strip()

libarchive/ffi.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,17 @@ def ffi(name, argtypes, restype, errcheck=None):
108108

109109
ffi('entry_filetype', [c_archive_entry_p], c_int)
110110
ffi('entry_mtime', [c_archive_entry_p], c_int)
111+
ffi('entry_pathname', [c_archive_entry_p], c_char_p)
111112
ffi('entry_pathname_w', [c_archive_entry_p], c_wchar_p)
112113
ffi('entry_sourcepath', [c_archive_entry_p], c_char_p)
113114
ffi('entry_size', [c_archive_entry_p], c_longlong)
114115
ffi('entry_size_is_set', [c_archive_entry_p], c_int)
115116
ffi('entry_mode', [c_archive_entry_p], c_int)
116117
ffi('entry_strmode', [c_archive_entry_p], c_char_p)
118+
ffi('entry_hardlink', [c_archive_entry_p], c_char_p)
119+
ffi('entry_hardlink_w', [c_archive_entry_p], c_wchar_p)
120+
ffi('entry_symlink', [c_archive_entry_p], c_char_p)
121+
ffi('entry_symlink_w', [c_archive_entry_p], c_wchar_p)
117122

118123
ffi('entry_update_pathname_utf8', [c_archive_entry_p, c_char_p], None)
119124

tests/__init__.py

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,20 @@
55

66
from __future__ import division, print_function, unicode_literals
77

8-
from contextlib import contextmanager
8+
from contextlib import closing, contextmanager
99
from copy import copy
1010
from os import chdir, getcwd, stat, walk
11-
from os.path import abspath, join
11+
from os.path import abspath, dirname, join
1212
from stat import S_ISREG
13+
import tarfile
14+
15+
from libarchive import file_reader
16+
17+
from . import surrogateescape
18+
19+
20+
data_dir = join(dirname(__file__), 'data')
21+
surrogateescape.register()
1322

1423

1524
def check_archive(archive, tree):
@@ -33,6 +42,68 @@ def check_archive(archive, tree):
3342
assert len(tree2) == 0
3443

3544

45+
def get_entries(location):
46+
"""
47+
Using the archive file at `location`, return an iterable of name->value
48+
mappings for each libarchive.ArchiveEntry objects essential attributes.
49+
Paths are base64-encoded because JSON is UTF-8 and cannot handle
50+
arbitrary binary pathdata.
51+
"""
52+
with file_reader(location) as arch:
53+
for entry in arch:
54+
# libarchive introduces prefixes such as h prefix for
55+
# hardlinks: tarfile does not, so we ignore the first char
56+
mode = entry.strmode[1:].decode('ascii')
57+
yield {
58+
'path': surrogate_decode(entry.pathname),
59+
'mtime': entry.mtime,
60+
'size': entry.size,
61+
'mode': mode,
62+
'isreg': entry.isreg,
63+
'isdir': entry.isdir,
64+
'islnk': entry.islnk,
65+
'issym': entry.issym,
66+
'linkpath': surrogate_decode(entry.linkpath),
67+
'isblk': entry.isblk,
68+
'ischr': entry.ischr,
69+
'isfifo': entry.isfifo,
70+
'isdev': entry.isdev,
71+
}
72+
73+
74+
def get_tarinfos(location):
75+
"""
76+
Using the tar archive file at `location`, return an iterable of
77+
name->value mappings for each tarfile.TarInfo objects essential
78+
attributes.
79+
Paths are base64-encoded because JSON is UTF-8 and cannot handle
80+
arbitrary binary pathdata.
81+
"""
82+
with closing(tarfile.open(location)) as tar:
83+
for entry in tar:
84+
path = surrogate_decode(entry.path or '')
85+
if entry.isdir() and not path.endswith('/'):
86+
path += '/'
87+
# libarchive introduces prefixes such as h prefix for
88+
# hardlinks: tarfile does not, so we ignore the first char
89+
mode = tarfile.filemode(entry.mode)[1:]
90+
yield {
91+
'path': path,
92+
'mtime': entry.mtime,
93+
'size': entry.size,
94+
'mode': mode,
95+
'isreg': entry.isreg(),
96+
'isdir': entry.isdir(),
97+
'islnk': entry.islnk(),
98+
'issym': entry.issym(),
99+
'linkpath': surrogate_decode(entry.linkpath or None),
100+
'isblk': entry.isblk(),
101+
'ischr': entry.ischr(),
102+
'isfifo': entry.isfifo(),
103+
'isdev': entry.isdev(),
104+
}
105+
106+
36107
@contextmanager
37108
def in_dir(dirpath):
38109
prev = abspath(getcwd())
@@ -59,3 +130,9 @@ def treestat(d):
59130
fpath = join(dirpath, fname)
60131
r[fpath] = stat_dict(fpath)
61132
return r
133+
134+
135+
def surrogate_decode(o):
136+
if isinstance(o, bytes):
137+
return o.decode('utf8', errors='surrogateescape')
138+
return o

tests/data/special.tar

110 KB
Binary file not shown.

tests/data/tar_relative.tar

10 KB
Binary file not shown.

tests/data/testtar.README

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This test file is borrowed from Python codebase and test suite.
2+
This is a trick Tar with several weird and malformed entries:
3+
https://hg.python.org/cpython/file/bff88c866886/Lib/test/testtar.tar

tests/data/testtar.tar

425 KB
Binary file not shown.

0 commit comments

Comments
 (0)