Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 26 additions & 16 deletions dissect/ntfs/attr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dissect.ntfs.c_ntfs import (
ATTRIBUTE_TYPE_CODE,
IO_REPARSE_TAG,
WOF_COMPRESSION_FORMAT,
c_ntfs,
segment_reference,
varint,
Expand Down Expand Up @@ -501,6 +502,8 @@ def __init__(self, fh: BinaryIO, record: MftRecord | None = None):
self.tag_header = c_ntfs._SYMBOLIC_LINK_REPARSE_BUFFER(data)
elif self.tag == IO_REPARSE_TAG.MOUNT_POINT:
self.tag_header = c_ntfs._MOUNT_POINT_REPARSE_BUFFER(data)
elif self.tag == IO_REPARSE_TAG.WOF:
self.tag_header = c_ntfs._COMPRESS_REPARSE_BUFFER(data)

self.buffer = data.read()

Expand All @@ -513,35 +516,42 @@ def tag(self) -> IO_REPARSE_TAG:

@property
def substitute_name(self) -> str | None:
if not self.tag_header:
return None
if self.tag in (IO_REPARSE_TAG.SYMLINK, IO_REPARSE_TAG.MOUNT_POINT):
offset = self.tag_header.SubstituteNameOffset
length = self.tag_header.SubstituteNameLength
return self.buffer[offset : offset + length].decode("utf-16-le")

offset = self.tag_header.SubstituteNameOffset
length = self.tag_header.SubstituteNameLength
return self.buffer[offset : offset + length].decode("utf-16-le")
return None

@property
def print_name(self) -> str | None:
if not self.tag_header:
return None
if self.tag in (IO_REPARSE_TAG.SYMLINK, IO_REPARSE_TAG.MOUNT_POINT):
offset = self.tag_header.PrintNameOffset
length = self.tag_header.PrintNameLength
return self.buffer[offset : offset + length].decode("utf-16-le")

offset = self.tag_header.PrintNameOffset
length = self.tag_header.PrintNameLength
return self.buffer[offset : offset + length].decode("utf-16-le")
return None

@property
def absolute(self) -> bool:
if self.tag != IO_REPARSE_TAG.SYMLINK:
return True
if self.tag == IO_REPARSE_TAG.SYMLINK:
return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.ABSOLUTE

return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.ABSOLUTE
return True

@property
def relative(self) -> bool:
if self.tag != IO_REPARSE_TAG.SYMLINK:
return False
if self.tag == IO_REPARSE_TAG.SYMLINK:
return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.RELATIVE

return False

@property
def wof_compression_format(self) -> int:
if self.tag == IO_REPARSE_TAG.WOF:
return self.tag_header.CompressionFormat

return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.RELATIVE
return WOF_COMPRESSION_FORMAT.NO_COMPRESSION


ATTRIBUTE_CLASS_MAP = {
Expand Down
28 changes: 28 additions & 0 deletions dissect/ntfs/c_ntfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import struct

from dissect.cstruct import cstruct
from dissect.util.compression import lznt1, lzxpress_huffman

ntfs_def = """
/* ================ Generic stuff ================ */
Expand Down Expand Up @@ -276,6 +277,22 @@
USHORT PrintNameLength;
} _MOUNT_POINT_REPARSE_BUFFER;

enum WOF_COMPRESSION_FORMAT : LONG {
NO_COMPRESSION = -2,
LZNT1 = -1,
XPRESS4K = 0,
LZX32K = 1,
XPRESS8K = 2,
XPRESS16K = 3,
};

typedef struct _COMPRESS_REPARSE_BUFFER {
ULONG WofVersion; // 1 - WIM backing provider ("WIMBoot"), 2 - System compressed file provider
ULONG WofProvider;
ULONG ProviderVer; // WOF_FILE_PROVIDER_CURRENT_VERSION == 1
WOF_COMPRESSION_FORMAT CompressionFormat; // WOF_COMPRESSION_FORMAT
} COMPRESS_REPARSE_BUFFER;

/* ================ Index ================ */

enum COLLATION : ULONG {
Expand Down Expand Up @@ -563,6 +580,17 @@
ACE_TYPE = c_ntfs.ACE_TYPE
ACE_OBJECT_FLAGS = c_ntfs.ACE_OBJECT_FLAGS
COLLATION = c_ntfs.COLLATION
WOF_COMPRESSION_FORMAT = c_ntfs.WOF_COMPRESSION_FORMAT

WOF_DECOMPRESSOR_MAP = {
WOF_COMPRESSION_FORMAT.NO_COMPRESSION: (None, None),
WOF_COMPRESSION_FORMAT.LZNT1: (lznt1.decompress, None),
WOF_COMPRESSION_FORMAT.XPRESS4K: (lzxpress_huffman.decompress, 4096),
WOF_COMPRESSION_FORMAT.XPRESS8K: (lzxpress_huffman.decompress, 4096 * 2),
WOF_COMPRESSION_FORMAT.XPRESS16K: (lzxpress_huffman.decompress, 4096 * 4),
# LZX is currently not implemented yet. see https://github.com/fox-it/dissect.util/issues/74
WOF_COMPRESSION_FORMAT.LZX32K: (None, 4096 * 8),
}

# Some useful magic numbers and constants
NTFS_SIGNATURE = b"NTFS "
Expand Down
31 changes: 31 additions & 0 deletions dissect/ntfs/mft.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
FILE_NUMBER_MFT,
FILE_NUMBER_ROOT,
IO_REPARSE_TAG,
WOF_COMPRESSION_FORMAT,
WOF_DECOMPRESSOR_MAP,
c_ntfs,
segment_reference,
)
Expand All @@ -27,6 +29,7 @@
NotAReparsePointError,
)
from dissect.ntfs.index import Index, IndexEntry
from dissect.ntfs.stream import WofCompressedStream
from dissect.ntfs.util import AttributeCollection, AttributeMap, apply_fixup

if TYPE_CHECKING:
Expand Down Expand Up @@ -331,6 +334,11 @@ def is_mount_point(self) -> bool:
attr = self.attributes[ATTRIBUTE_TYPE_CODE.REPARSE_POINT]
return bool(attr) and attr.tag == IO_REPARSE_TAG.MOUNT_POINT

def is_wof_compressed(self) -> bool:
"""Return whether this record is a WOF compressed file."""
attr = self.attributes[ATTRIBUTE_TYPE_CODE.REPARSE_POINT]
return bool(attr) and attr.tag == IO_REPARSE_TAG.WOF

@cached_property
def reparse_point_name(self) -> str:
"""Return the (printable) name of this reparse point."""
Expand Down Expand Up @@ -383,6 +391,23 @@ def _get_stream_attributes(
raise FileNotFoundError(f"No such stream on record {self}: ({name!r}, {attr_type})")
return attrs

def _open_wof(
self,
name: str = "WofCompressedData",
attr_type: ATTRIBUTE_TYPE_CODE = ATTRIBUTE_TYPE_CODE.DATA,
allocated: bool = False,
) -> BinaryIO:
fh = self._get_stream_attributes(name, attr_type).open(allocated)
compression_format = self.attributes.REPARSE_POINT.wof_compression_format
decompressor, chunk_size = WOF_DECOMPRESSOR_MAP.get(compression_format)

if compression_format is WOF_COMPRESSION_FORMAT.NO_COMPRESSION:
return fh
if compression_format in (WOF_COMPRESSION_FORMAT.LZX32K, WOF_COMPRESSION_FORMAT.LZNT1):
raise NotImplementedError(f"Compression format not supported for decompression: {compression_format}")

return WofCompressedStream(fh, 0, fh.size, self.size(), decompressor, chunk_size)

def open(
self,
name: str = "",
Expand All @@ -399,6 +424,12 @@ def open(
Raises:
FileNotFoundError: If there are no attributes with the given name and type.
"""

# If we explicitly ask for the WofCompressedData stream, we give it back as is
# this way, users can still access the raw uncompressed stream.
if name != "WofCompressedData" and (self.is_reparse_point() and self.is_wof_compressed()):
return self._open_wof(attr_type=attr_type, allocated=allocated)

return self._get_stream_attributes(name, attr_type).open(allocated)

def size(
Expand Down
60 changes: 59 additions & 1 deletion dissect/ntfs/stream.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from __future__ import annotations

import io
import struct
from typing import BinaryIO

from dissect.util import lznt1
from dissect.util.stream import RunlistStream
from dissect.util.stream import CompressedStream, RunlistStream

DEFAULT_CHUNK_SIZE = 4 * 1024


class CompressedRunlistStream(RunlistStream):
Expand Down Expand Up @@ -112,3 +115,58 @@ def _read(self, offset: int, length: int) -> bytes:
cu_block += 1

return b"".join(read_list)


class WofCompressedStream(CompressedStream):
"""Compressed stream for Windows Overlay Filter (WOF) files. This class handles the decompression of WOF compressed
files using the specified decompressor.

WOF Compressed files start with a chunk table, which is a list of offsets to the start of each chunk in the
compressed data. The chunk table is followed by the compressed data itself.

Supported decompression methods are currently:
* LZXPRESS4K Huffman (default)
* LZXPRESS8K Huffman
* LZXPRESS16K Huffman
* LZXPRESS32K Huffman

Note that LZX and LZNT1 decompression are not yet supported.

Args:
fh: A file-like object for the compressed data.
offset: The offset to the start of the chunk table.
size: The size of the compressed data.
original_size: The original size of the uncompressed data.
decompress: The decompressor function to use.
chunk_size: The size of the chunks to read from the compressed data. (default: 4 KiB)
"""

def __init__(
self,
fh: BinaryIO,
offset: int,
size: int,
original_size: int,
decompress: callable,
chunk_size: int = DEFAULT_CHUNK_SIZE,
):
fh.seek(offset)
num_chunks = (original_size + chunk_size - 1) // chunk_size - 1

entry_size = "Q" if original_size > 0xFFFFFFFF else "I"
pattern = f"<{num_chunks}{entry_size}"
chunks = (0, *struct.unpack(pattern, fh.read(struct.calcsize(pattern))))

super().__init__(fh, fh.tell(), size, original_size, decompress, chunk_size, chunks)

def _read_chunk(self, offset: int, size: int) -> bytes:
self.fh.seek(self.offset + offset)
buf = self.fh.read(size)

# https://github.com/ebiggers/ntfs-3g-system-compression/blob/53f7bcba9c1c54d8e15e87649ed66654474885ab/src/system_compression.c#L556
uncompressed_size = (
((self.original_size - 1) & (self.chunk_size - 1)) + 1 if offset == self.chunks[-1] else self.chunk_size
)

# https://github.com/ebiggers/ntfs-3g-system-compression/blob/53f7bcba9c1c54d8e15e87649ed66654474885ab/src/system_compression.c#L570
return buf if len(buf) == uncompressed_size else self.decompressor(buf)
Binary file added tests/data/wof/test16k.txt:WofCompressedData.gz
Binary file not shown.
Binary file added tests/data/wof/test4k.txt:WofCompressedData.gz
Binary file not shown.
Binary file added tests/data/wof/test8k.txt:WofCompressedData.gz
Binary file not shown.
Binary file added tests/data/wof/testlzx.txt:WofCompressedData.gz
Binary file not shown.
44 changes: 43 additions & 1 deletion tests/test_attr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from dissect.ntfs.attr import Attribute, FileName, StandardInformation
from dissect.ntfs.c_ntfs import ATTRIBUTE_TYPE_CODE, IO_REPARSE_TAG
from dissect.ntfs.c_ntfs import ATTRIBUTE_TYPE_CODE, IO_REPARSE_TAG, WOF_COMPRESSION_FORMAT
from dissect.ntfs.exceptions import VolumeNotAvailableError


Expand Down Expand Up @@ -110,3 +110,45 @@ def test_reparse_point_symlink() -> None:
assert attr.print_name == "Target"
assert not attr.absolute
assert attr.relative


@pytest.mark.parametrize(
("attribute", "compression_format"),
[
(
"c00000003000000000000000000007001800000018000000170000801000000001000000020000000100000000000000ffffffff8279471100000000000000000000000000000000",
WOF_COMPRESSION_FORMAT.XPRESS4K,
),
(
"c0000000300000000000000000000a001800000018000000170000801000000001000000020000000100000002000000ffffffff8279471100000000000000000000000000000000",
WOF_COMPRESSION_FORMAT.XPRESS8K,
),
(
"c00000003000000000000000000007001800000018000000170000801000000001000000020000000100000003000000ffffffff82794711ffffffff827947110000000000000000",
WOF_COMPRESSION_FORMAT.XPRESS16K,
),
(
"c0000000300000000000000000000a001800000018000000170000801000000001000000020000000100000001000000ffffffff82794711ffffffff827947110000000000000000",
WOF_COMPRESSION_FORMAT.LZX32K,
),
(
"c0000000300000000000000000000a0018000000180000001700008010000000010000000200000001000000ffffffffffffffff82794711ffffffff827947110000000000000000",
WOF_COMPRESSION_FORMAT.LZNT1,
),
(
"c0000000300000000000000000000a0018000000180000001700008010000000010000000200000001000000feffffffffffffff82794711ffffffff827947110000000000000000",
WOF_COMPRESSION_FORMAT.NO_COMPRESSION,
),
],
)
def test_reparse_point_wof(attribute: str, compression_format: int) -> None:
attr = Attribute.from_bytes(bytes.fromhex(attribute))
assert attr.type == ATTRIBUTE_TYPE_CODE.REPARSE_POINT

assert attr.tag == IO_REPARSE_TAG.WOF
assert attr.absolute
assert not attr.relative

assert attr.substitute_name is None
assert attr.print_name is None
assert attr.wof_compression_format == compression_format
Loading
Loading