Skip to content

remove daemon threads #1483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 9, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions torchdata/nodes/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ def __init__(
max_concurrent: Optional[int],
snapshot_frequency: int,
initial_state: Optional[Dict[str, Any]],
daemonic_reading: bool,
):
self.source = source
self.map_fn = map_fn
Expand All @@ -175,7 +174,6 @@ def __init__(
self.method = method
self.mp_context = mp_context
self.snapshot_frequency = snapshot_frequency
self.daemonic_reading = daemonic_reading

self._in_q: Union[queue.Queue, mp.Queue] = queue.Queue() if method == "thread" else mp_context.Queue()
self._intermed_q: Union[queue.Queue, mp.Queue] = queue.Queue() if method == "thread" else mp_context.Queue()
Expand Down Expand Up @@ -209,7 +207,7 @@ def __init__(
self._stop,
),
name="read_thread(target=_populate_queue)",
daemon=self.daemonic_reading,
daemon=False,
)
self._read_thread.start()

Expand Down Expand Up @@ -249,7 +247,7 @@ def __init__(
self._sort_q,
self._stop,
),
daemon=True,
daemon=False,
name="sort_thread(target=_sort_worker)",
)
self._out_q = self._sort_q
Expand Down Expand Up @@ -352,7 +350,6 @@ def __init__(
multiprocessing_context: Optional[str] = None,
max_concurrent: Optional[int] = None,
snapshot_frequency: int = 1,
daemonic_reading: bool = True,
):
super().__init__()
assert method in ["thread", "process"]
Expand All @@ -371,7 +368,6 @@ def __init__(
raise ValueError(f"{max_concurrent=} should be <= {num_workers=}!")
self.max_concurrent = max_concurrent
self.snapshot_frequency = snapshot_frequency
self.daemonic_reading = daemonic_reading
self._it: Optional[Union[_InlineMapperIter[T], _ParallelMapperIter[T]]] = None

def reset(self, initial_state: Optional[Dict[str, Any]] = None):
Expand Down Expand Up @@ -399,7 +395,6 @@ def _parallel_reset(self, initial_state: Optional[Dict[str, Any]]):
max_concurrent=self.max_concurrent,
snapshot_frequency=self.snapshot_frequency,
initial_state=initial_state,
daemonic_reading=self.daemonic_reading,
)

def next(self) -> T:
Expand Down Expand Up @@ -448,7 +443,6 @@ def __init__(
max_concurrent: Optional[int] = None,
snapshot_frequency: int = 1,
prebatch: Optional[int] = None,
daemonic_reading: bool = True,
):
super().__init__()
assert method in ["thread", "process"]
Expand All @@ -462,7 +456,6 @@ def __init__(
self.max_concurrent = max_concurrent
self.snapshot_frequency = snapshot_frequency
self.prebatch = prebatch
self.daemonic_reading = daemonic_reading
if prebatch is None:
self.map_fn = map_fn
self.source = source
Expand All @@ -481,7 +474,6 @@ def __init__(
multiprocessing_context=self.multiprocessing_context,
max_concurrent=self.max_concurrent,
snapshot_frequency=self.snapshot_frequency,
daemonic_reading=self.daemonic_reading,
)

if self.prebatch is None:
Expand Down Expand Up @@ -581,7 +573,7 @@ def __init__(
self._sem,
self._stop_event,
),
daemon=True,
daemon=False,
name=f"worker_thread(target={self.worker.__name__})",
)
self._thread.start()
Expand All @@ -605,6 +597,7 @@ def __iter__(self) -> Iterator[T]:
def __next__(self) -> T:
while True:
if self._stop_event.is_set():
self._shutdown()
raise StopIteration()
try:
item, idx = self._q.get(block=True, timeout=QUEUE_TIMEOUT)
Expand All @@ -614,11 +607,13 @@ def __next__(self) -> T:
if isinstance(item, StopIteration):
self._sem.release()
self._stop_event.set()
self._shutdown()
raise item
elif isinstance(item, ExceptionWrapper):
if not isinstance(item, StartupExceptionWrapper):
# We don't need to release for startup exceptions
self._sem.release()
self._shutdown()
self._stop_event.set()
item.reraise()
else:
Expand Down
Loading