fox-it · Horofic · Apr 9, 2025 · Apr 16, 2025
diff --git a/dissect/ntfs/attr.py b/dissect/ntfs/attr.py
@@ -9,6 +9,7 @@
 from dissect.ntfs.c_ntfs import (
     ATTRIBUTE_TYPE_CODE,
     IO_REPARSE_TAG,
+    WOF_COMPRESSION_FORMAT,
     c_ntfs,
     segment_reference,
     varint,
@@ -501,6 +502,8 @@ def __init__(self, fh: BinaryIO, record: MftRecord | None = None):
             self.tag_header = c_ntfs._SYMBOLIC_LINK_REPARSE_BUFFER(data)
         elif self.tag == IO_REPARSE_TAG.MOUNT_POINT:
             self.tag_header = c_ntfs._MOUNT_POINT_REPARSE_BUFFER(data)
+        elif self.tag == IO_REPARSE_TAG.WOF:
+            self.tag_header = c_ntfs._COMPRESS_REPARSE_BUFFER(data)
 
         self.buffer = data.read()
 
@@ -513,35 +516,42 @@ def tag(self) -> IO_REPARSE_TAG:
 
     @property
     def substitute_name(self) -> str | None:
-        if not self.tag_header:
-            return None
+        if self.tag in (IO_REPARSE_TAG.SYMLINK, IO_REPARSE_TAG.MOUNT_POINT):
+            offset = self.tag_header.SubstituteNameOffset
+            length = self.tag_header.SubstituteNameLength
+            return self.buffer[offset : offset + length].decode("utf-16-le")
 
-        offset = self.tag_header.SubstituteNameOffset
-        length = self.tag_header.SubstituteNameLength
-        return self.buffer[offset : offset + length].decode("utf-16-le")
+        return None
 
     @property
     def print_name(self) -> str | None:
-        if not self.tag_header:
-            return None
+        if self.tag in (IO_REPARSE_TAG.SYMLINK, IO_REPARSE_TAG.MOUNT_POINT):
+            offset = self.tag_header.PrintNameOffset
+            length = self.tag_header.PrintNameLength
+            return self.buffer[offset : offset + length].decode("utf-16-le")
 
-        offset = self.tag_header.PrintNameOffset
-        length = self.tag_header.PrintNameLength
-        return self.buffer[offset : offset + length].decode("utf-16-le")
+        return None
 
     @property
     def absolute(self) -> bool:
-        if self.tag != IO_REPARSE_TAG.SYMLINK:
-            return True
+        if self.tag == IO_REPARSE_TAG.SYMLINK:
+            return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.ABSOLUTE
 
-        return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.ABSOLUTE
+        return True
 
     @property
     def relative(self) -> bool:
-        if self.tag != IO_REPARSE_TAG.SYMLINK:
-            return False
+        if self.tag == IO_REPARSE_TAG.SYMLINK:
+            return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.RELATIVE
+
+        return False
+
+    @property
+    def wof_compression_format(self) -> int:
+        if self.tag == IO_REPARSE_TAG.WOF:
+            return self.tag_header.CompressionFormat
 
-        return self.tag_header.Flags == c_ntfs.SYMLINK_FLAG.RELATIVE
+        return WOF_COMPRESSION_FORMAT.NO_COMPRESSION
 
 
 ATTRIBUTE_CLASS_MAP = {

diff --git a/dissect/ntfs/c_ntfs.py b/dissect/ntfs/c_ntfs.py
@@ -3,6 +3,7 @@
 import struct
 
 from dissect.cstruct import cstruct
+from dissect.util.compression import lznt1, lzxpress_huffman
 
 ntfs_def = """
 /* ================ Generic stuff ================ */
@@ -276,6 +277,22 @@
     USHORT  PrintNameLength;
 } _MOUNT_POINT_REPARSE_BUFFER;
 
+enum WOF_COMPRESSION_FORMAT : LONG {
+    NO_COMPRESSION = -2,
+    LZNT1 = -1,
+    XPRESS4K = 0,
+    LZX32K = 1,
+    XPRESS8K = 2,
+    XPRESS16K = 3,
+};
+
+typedef struct _COMPRESS_REPARSE_BUFFER {
+    ULONG WofVersion; // 1 - WIM backing provider ("WIMBoot"), 2 - System compressed file provider
+    ULONG WofProvider;
+    ULONG ProviderVer; // WOF_FILE_PROVIDER_CURRENT_VERSION == 1
+    WOF_COMPRESSION_FORMAT CompressionFormat; // WOF_COMPRESSION_FORMAT
+} COMPRESS_REPARSE_BUFFER;
+
 /* ================ Index ================ */
 
 enum COLLATION : ULONG {
@@ -563,6 +580,17 @@
 ACE_TYPE = c_ntfs.ACE_TYPE
 ACE_OBJECT_FLAGS = c_ntfs.ACE_OBJECT_FLAGS
 COLLATION = c_ntfs.COLLATION
+WOF_COMPRESSION_FORMAT = c_ntfs.WOF_COMPRESSION_FORMAT
+
+WOF_DECOMPRESSOR_MAP = {
+    WOF_COMPRESSION_FORMAT.NO_COMPRESSION: (None, None),
+    WOF_COMPRESSION_FORMAT.LZNT1: (lznt1.decompress, None),
+    WOF_COMPRESSION_FORMAT.XPRESS4K: (lzxpress_huffman.decompress, 4096),
+    WOF_COMPRESSION_FORMAT.XPRESS8K: (lzxpress_huffman.decompress, 4096 * 2),
+    WOF_COMPRESSION_FORMAT.XPRESS16K: (lzxpress_huffman.decompress, 4096 * 4),
+    # LZX is currently not implemented yet. see https://github.com/fox-it/dissect.util/issues/74
+    WOF_COMPRESSION_FORMAT.LZX32K: (None, 4096 * 8),
+}
 
 # Some useful magic numbers and constants
 NTFS_SIGNATURE = b"NTFS    "

diff --git a/dissect/ntfs/mft.py b/dissect/ntfs/mft.py
@@ -15,6 +15,8 @@
     FILE_NUMBER_MFT,
     FILE_NUMBER_ROOT,
     IO_REPARSE_TAG,
+    WOF_COMPRESSION_FORMAT,
+    WOF_DECOMPRESSOR_MAP,
     c_ntfs,
     segment_reference,
 )
@@ -27,6 +29,7 @@
     NotAReparsePointError,
 )
 from dissect.ntfs.index import Index, IndexEntry
+from dissect.ntfs.stream import WofCompressedStream
 from dissect.ntfs.util import AttributeCollection, AttributeMap, apply_fixup
 
 if TYPE_CHECKING:
@@ -331,6 +334,11 @@ def is_mount_point(self) -> bool:
         attr = self.attributes[ATTRIBUTE_TYPE_CODE.REPARSE_POINT]
         return bool(attr) and attr.tag == IO_REPARSE_TAG.MOUNT_POINT
 
+    def is_wof_compressed(self) -> bool:
+        """Return whether this record is a WOF compressed file."""
+        attr = self.attributes[ATTRIBUTE_TYPE_CODE.REPARSE_POINT]
+        return bool(attr) and attr.tag == IO_REPARSE_TAG.WOF
+
     @cached_property
     def reparse_point_name(self) -> str:
         """Return the (printable) name of this reparse point."""
@@ -383,6 +391,23 @@ def _get_stream_attributes(
             raise FileNotFoundError(f"No such stream on record {self}: ({name!r}, {attr_type})")
         return attrs
 
+    def _open_wof(
+        self,
+        name: str = "WofCompressedData",
+        attr_type: ATTRIBUTE_TYPE_CODE = ATTRIBUTE_TYPE_CODE.DATA,
+        allocated: bool = False,
+    ) -> BinaryIO:
+        fh = self._get_stream_attributes(name, attr_type).open(allocated)
+        compression_format = self.attributes.REPARSE_POINT.wof_compression_format
+        decompressor, chunk_size = WOF_DECOMPRESSOR_MAP.get(compression_format)
+
+        if compression_format is WOF_COMPRESSION_FORMAT.NO_COMPRESSION:
+            return fh
+        if compression_format in (WOF_COMPRESSION_FORMAT.LZX32K, WOF_COMPRESSION_FORMAT.LZNT1):
+            raise NotImplementedError(f"Compression format not supported for decompression: {compression_format}")
+
+        return WofCompressedStream(fh, 0, fh.size, self.size(), decompressor, chunk_size)
+
     def open(
         self,
         name: str = "",
@@ -399,6 +424,12 @@ def open(
         Raises:
             FileNotFoundError: If there are no attributes with the given name and type.
         """
+
+        # If we explicitly ask for the WofCompressedData stream, we give it back as is
+        # this way, users can still access the raw uncompressed stream.
+        if name != "WofCompressedData" and (self.is_reparse_point() and self.is_wof_compressed()):
+            return self._open_wof(attr_type=attr_type, allocated=allocated)
+
         return self._get_stream_attributes(name, attr_type).open(allocated)
 
     def size(

diff --git a/dissect/ntfs/stream.py b/dissect/ntfs/stream.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
 
 import io
+import struct
 from typing import BinaryIO
 
 from dissect.util import lznt1
-from dissect.util.stream import RunlistStream
+from dissect.util.stream import CompressedStream, RunlistStream
+
+DEFAULT_CHUNK_SIZE = 4 * 1024
 
 
 class CompressedRunlistStream(RunlistStream):
@@ -112,3 +115,58 @@ def _read(self, offset: int, length: int) -> bytes:
             cu_block += 1
 
         return b"".join(read_list)
+
+
+class WofCompressedStream(CompressedStream):
+    """Compressed stream for Windows Overlay Filter (WOF) files. This class handles the decompression of WOF compressed
+    files using the specified decompressor.
+
+    WOF Compressed files start with a chunk table, which is a list of offsets to the start of each chunk in the
+    compressed data. The chunk table is followed by the compressed data itself.
+
+    Supported decompression methods are currently:
+        * LZXPRESS4K Huffman (default)
+        * LZXPRESS8K Huffman
+        * LZXPRESS16K Huffman
+        * LZXPRESS32K Huffman
+
+    Note that LZX and LZNT1 decompression are not yet supported.
+
+    Args:
+        fh: A file-like object for the compressed data.
+        offset: The offset to the start of the chunk table.
+        size: The size of the compressed data.
+        original_size: The original size of the uncompressed data.
+        decompress: The decompressor function to use.
+        chunk_size: The size of the chunks to read from the compressed data. (default: 4 KiB)
+    """
+
+    def __init__(
+        self,
+        fh: BinaryIO,
+        offset: int,
+        size: int,
+        original_size: int,
+        decompress: callable,
+        chunk_size: int = DEFAULT_CHUNK_SIZE,
+    ):
+        fh.seek(offset)
+        num_chunks = (original_size + chunk_size - 1) // chunk_size - 1
+
+        entry_size = "Q" if original_size > 0xFFFFFFFF else "I"
+        pattern = f"<{num_chunks}{entry_size}"
+        chunks = (0, *struct.unpack(pattern, fh.read(struct.calcsize(pattern))))
+
+        super().__init__(fh, fh.tell(), size, original_size, decompress, chunk_size, chunks)
+
+    def _read_chunk(self, offset: int, size: int) -> bytes:
+        self.fh.seek(self.offset + offset)
+        buf = self.fh.read(size)
+
+        # https://github.com/ebiggers/ntfs-3g-system-compression/blob/53f7bcba9c1c54d8e15e87649ed66654474885ab/src/system_compression.c#L556
+        uncompressed_size = (
+            ((self.original_size - 1) & (self.chunk_size - 1)) + 1 if offset == self.chunks[-1] else self.chunk_size
+        )
+
+        # https://github.com/ebiggers/ntfs-3g-system-compression/blob/53f7bcba9c1c54d8e15e87649ed66654474885ab/src/system_compression.c#L570
+        return buf if len(buf) == uncompressed_size else self.decompressor(buf)
diff --git a/tests/data/wof/test16k.txt:WofCompressedData.gz b/tests/data/wof/test16k.txt:WofCompressedData.gz
diff --git a/tests/data/wof/test4k.txt:WofCompressedData.gz b/tests/data/wof/test4k.txt:WofCompressedData.gz
diff --git a/tests/data/wof/test8k.txt:WofCompressedData.gz b/tests/data/wof/test8k.txt:WofCompressedData.gz
diff --git a/tests/data/wof/testlzx.txt:WofCompressedData.gz b/tests/data/wof/testlzx.txt:WofCompressedData.gz
diff --git a/tests/test_attr.py b/tests/test_attr.py
@@ -5,7 +5,7 @@
 import pytest
 
 from dissect.ntfs.attr import Attribute, FileName, StandardInformation
-from dissect.ntfs.c_ntfs import ATTRIBUTE_TYPE_CODE, IO_REPARSE_TAG
+from dissect.ntfs.c_ntfs import ATTRIBUTE_TYPE_CODE, IO_REPARSE_TAG, WOF_COMPRESSION_FORMAT
 from dissect.ntfs.exceptions import VolumeNotAvailableError
 
 
@@ -110,3 +110,45 @@ def test_reparse_point_symlink() -> None:
     assert attr.print_name == "Target"
     assert not attr.absolute
     assert attr.relative
+
+
+@pytest.mark.parametrize(
+    ("attribute", "compression_format"),
+    [
+        (
+            "c00000003000000000000000000007001800000018000000170000801000000001000000020000000100000000000000ffffffff8279471100000000000000000000000000000000",
+            WOF_COMPRESSION_FORMAT.XPRESS4K,
+        ),
+        (
+            "c0000000300000000000000000000a001800000018000000170000801000000001000000020000000100000002000000ffffffff8279471100000000000000000000000000000000",
+            WOF_COMPRESSION_FORMAT.XPRESS8K,
+        ),
+        (
+            "c00000003000000000000000000007001800000018000000170000801000000001000000020000000100000003000000ffffffff82794711ffffffff827947110000000000000000",
+            WOF_COMPRESSION_FORMAT.XPRESS16K,
+        ),
+        (
+            "c0000000300000000000000000000a001800000018000000170000801000000001000000020000000100000001000000ffffffff82794711ffffffff827947110000000000000000",
+            WOF_COMPRESSION_FORMAT.LZX32K,
+        ),
+        (
+            "c0000000300000000000000000000a0018000000180000001700008010000000010000000200000001000000ffffffffffffffff82794711ffffffff827947110000000000000000",
+            WOF_COMPRESSION_FORMAT.LZNT1,
+        ),
+        (
+            "c0000000300000000000000000000a0018000000180000001700008010000000010000000200000001000000feffffffffffffff82794711ffffffff827947110000000000000000",
+            WOF_COMPRESSION_FORMAT.NO_COMPRESSION,
+        ),
+    ],
+)
+def test_reparse_point_wof(attribute: str, compression_format: int) -> None:
+    attr = Attribute.from_bytes(bytes.fromhex(attribute))
+    assert attr.type == ATTRIBUTE_TYPE_CODE.REPARSE_POINT
+
+    assert attr.tag == IO_REPARSE_TAG.WOF
+    assert attr.absolute
+    assert not attr.relative
+
+    assert attr.substitute_name is None
+    assert attr.print_name is None
+    assert attr.wof_compression_format == compression_format