2121import threading
2222import time
2323import uuid
24- from collections import deque , namedtuple
24+ from collections import deque
2525from contextlib import contextmanager
2626
2727import sentry_sdk
3535 nanosecond_time ,
3636)
3737
38- RawFrameData = namedtuple (
39- "RawFrameData" , ["abs_path" , "filename" , "function" , "lineno" , "module" ]
40- )
41-
4238if MYPY :
4339 from types import FrameType
4440 from typing import Any
5450 import sentry_sdk .scope
5551 import sentry_sdk .tracing
5652
57- RawStack = Tuple [RawFrameData , ...]
58- RawSample = Sequence [Tuple [str , RawStack ]]
59- RawSampleWithId = Sequence [Tuple [str , int , RawStack ]]
53+ StackId = int
54+
55+ RawFrame = Tuple [
56+ str , # abs_path
57+ Optional [str ], # module
58+ Optional [str ], # filename
59+ str , # function
60+ int , # lineno
61+ ]
62+ RawStack = Tuple [RawFrame , ...]
63+ RawSample = Sequence [Tuple [str , Tuple [StackId , RawStack ]]]
6064
6165 ProcessedStack = Tuple [int , ...]
6266
@@ -155,8 +159,13 @@ def teardown_profiler():
155159MAX_STACK_DEPTH = 128
156160
157161
158- def extract_stack (frame , max_stack_depth = MAX_STACK_DEPTH ):
159- # type: (Optional[FrameType], int) -> Tuple[RawFrameData, ...]
162+ def extract_stack (
163+ frame , # type: Optional[FrameType]
164+ cwd , # type: str
165+ prev_cache = None , # type: Optional[Tuple[StackId, RawStack, Deque[FrameType]]]
166+ max_stack_depth = MAX_STACK_DEPTH , # type: int
167+ ):
168+ # type: (...) -> Tuple[StackId, RawStack, Deque[FrameType]]
160169 """
161170 Extracts the stack starting the specified frame. The extracted stack
162171 assumes the specified frame is the top of the stack, and works back
@@ -166,30 +175,71 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH):
166175 only the first `MAX_STACK_DEPTH` frames will be returned.
167176 """
168177
169- stack = deque (maxlen = max_stack_depth ) # type: Deque[FrameType]
178+ frames = deque (maxlen = max_stack_depth ) # type: Deque[FrameType]
170179
171180 while frame is not None :
172- stack .append (frame )
181+ frames .append (frame )
173182 frame = frame .f_back
174183
175- return tuple (extract_frame (frame ) for frame in stack )
184+ if prev_cache is None :
185+ stack = tuple (extract_frame (frame , cwd ) for frame in frames )
186+ else :
187+ _ , prev_stack , prev_frames = prev_cache
188+ prev_depth = len (prev_frames )
189+ depth = len (frames )
190+
191+ # We want to match the frame found in this sample to the frames found in the
192+ # previous sample. If they are the same (using the `is` operator), we can
193+ # skip the expensive work of extracting the frame information and reuse what
194+ # we extracted during the last sample.
195+ #
196+ # Make sure to keep in mind that the stack is ordered from the inner most
197+ # from to the outer most frame so be careful with the indexing.
198+ stack = tuple (
199+ prev_stack [i ]
200+ if i >= 0 and frame is prev_frames [i ]
201+ else extract_frame (frame , cwd )
202+ for i , frame in zip (range (prev_depth - depth , prev_depth ), frames )
203+ )
204+
205+ # Instead of mapping the stack into frame ids and hashing
206+ # that as a tuple, we can directly hash the stack.
207+ # This saves us from having to generate yet another list.
208+ # Additionally, using the stack as the key directly is
209+ # costly because the stack can be large, so we pre-hash
210+ # the stack, and use the hash as the key as this will be
211+ # needed a few times to improve performance.
212+ stack_id = hash (stack )
176213
214+ return stack_id , stack , frames
177215
178- def extract_frame (frame ):
179- # type: (FrameType) -> RawFrameData
216+
217+ def extract_frame (frame , cwd ):
218+ # type: (FrameType, str) -> RawFrame
180219 abs_path = frame .f_code .co_filename
181220
182221 try :
183222 module = frame .f_globals ["__name__" ]
184223 except Exception :
185224 module = None
186225
187- return RawFrameData (
188- abs_path = os .path .abspath (abs_path ),
189- filename = filename_for_module (module , abs_path ) or None ,
190- function = get_frame_name (frame ),
191- lineno = frame .f_lineno ,
192- module = module ,
226+ # namedtuples can be many times slower when initialing
227+ # and accessing attribute so we opt to use a tuple here instead
228+ return (
229+ # This originally was `os.path.abspath(abs_path)` but that had
230+ # a large performance overhead.
231+ #
232+ # According to docs, this is equivalent to
233+ # `os.path.normpath(os.path.join(os.getcwd(), path))`.
234+ # The `os.getcwd()` call is slow here, so we precompute it.
235+ #
236+ # Additionally, since we are using normalized path already,
237+ # we skip calling `os.path.normpath` entirely.
238+ os .path .join (cwd , abs_path ),
239+ module ,
240+ filename_for_module (module , abs_path ) or None ,
241+ get_frame_name (frame ),
242+ frame .f_lineno ,
193243 )
194244
195245
@@ -200,6 +250,8 @@ def get_frame_name(frame):
200250 # we should consider using instead where possible
201251
202252 f_code = frame .f_code
253+ co_varnames = f_code .co_varnames
254+
203255 # co_name only contains the frame name. If the frame was a method,
204256 # the class name will NOT be included.
205257 name = f_code .co_name
@@ -210,8 +262,8 @@ def get_frame_name(frame):
210262 if (
211263 # the co_varnames start with the frame's positional arguments
212264 # and we expect the first to be `self` if its an instance method
213- f_code . co_varnames
214- and f_code . co_varnames [0 ] == "self"
265+ co_varnames
266+ and co_varnames [0 ] == "self"
215267 and "self" in frame .f_locals
216268 ):
217269 for cls in frame .f_locals ["self" ].__class__ .__mro__ :
@@ -226,8 +278,8 @@ def get_frame_name(frame):
226278 if (
227279 # the co_varnames start with the frame's positional arguments
228280 # and we expect the first to be `cls` if its a class method
229- f_code . co_varnames
230- and f_code . co_varnames [0 ] == "cls"
281+ co_varnames
282+ and co_varnames [0 ] == "cls"
231283 and "cls" in frame .f_locals
232284 ):
233285 for cls in frame .f_locals ["cls" ].__mro__ :
@@ -338,13 +390,11 @@ class SampleBuffer(object):
338390 def __init__ (self , capacity ):
339391 # type: (int) -> None
340392
341- self .buffer = [
342- None
343- ] * capacity # type: List[Optional[Tuple[int, RawSampleWithId]]]
393+ self .buffer = [None ] * capacity # type: List[Optional[Tuple[int, RawSample]]]
344394 self .capacity = capacity # type: int
345395 self .idx = 0 # type: int
346396
347- def write (self , ts , raw_sample ):
397+ def write (self , ts , sample ):
348398 # type: (int, RawSample) -> None
349399 """
350400 Writing to the buffer is not thread safe. There is the possibility
@@ -359,40 +409,24 @@ def write(self, ts, raw_sample):
359409 """
360410 idx = self .idx
361411
362- sample = [
363- (
364- thread_id ,
365- # Instead of mapping the stack into frame ids and hashing
366- # that as a tuple, we can directly hash the stack.
367- # This saves us from having to generate yet another list.
368- # Additionally, using the stack as the key directly is
369- # costly because the stack can be large, so we pre-hash
370- # the stack, and use the hash as the key as this will be
371- # needed a few times to improve performance.
372- hash (stack ),
373- stack ,
374- )
375- for thread_id , stack in raw_sample
376- ]
377-
378412 self .buffer [idx ] = (ts , sample )
379413 self .idx = (idx + 1 ) % self .capacity
380414
381415 def slice_profile (self , start_ns , stop_ns ):
382416 # type: (int, int) -> ProcessedProfile
383417 samples = [] # type: List[ProcessedSample]
384- stacks = dict () # type: Dict[int , int]
385- stacks_list = list () # type: List[ProcessedStack]
386- frames = dict () # type: Dict[RawFrameData , int]
387- frames_list = list () # type: List[ProcessedFrame]
418+ stacks = {} # type: Dict[StackId , int]
419+ stacks_list = [] # type: List[ProcessedStack]
420+ frames = {} # type: Dict[RawFrame , int]
421+ frames_list = [] # type: List[ProcessedFrame]
388422
389423 for ts , sample in filter (None , self .buffer ):
390424 if start_ns > ts or ts > stop_ns :
391425 continue
392426
393427 elapsed_since_start_ns = str (ts - start_ns )
394428
395- for tid , hashed_stack , stack in sample :
429+ for tid , ( hashed_stack , stack ) in sample :
396430 # Check if the stack is indexed first, this lets us skip
397431 # indexing frames if it's not necessary
398432 if hashed_stack not in stacks :
@@ -401,11 +435,11 @@ def slice_profile(self, start_ns, stop_ns):
401435 frames [frame ] = len (frames )
402436 frames_list .append (
403437 {
404- "abs_path" : frame . abs_path ,
405- "function " : frame . function or "<unknown>" ,
406- "filename" : frame . filename ,
407- "lineno " : frame . lineno ,
408- "module " : frame . module ,
438+ "abs_path" : frame [ 0 ] ,
439+ "module " : frame [ 1 ] ,
440+ "filename" : frame [ 2 ] ,
441+ "function " : frame [ 3 ] ,
442+ "lineno " : frame [ 4 ] ,
409443 }
410444 )
411445
@@ -439,6 +473,14 @@ def slice_profile(self, start_ns, stop_ns):
439473
440474 def make_sampler (self ):
441475 # type: () -> Callable[..., None]
476+ cwd = os .getcwd ()
477+
478+ # In Python3+, we can use the `nonlocal` keyword to rebind the value,
479+ # but this is not possible in Python2. To get around this, we wrap
480+ # the value in a list to allow updating this value each sample.
481+ last_sample = [
482+ {}
483+ ] # type: List[Dict[int, Tuple[StackId, RawStack, Deque[FrameType]]]]
442484
443485 def _sample_stack (* args , ** kwargs ):
444486 # type: (*Any, **Any) -> None
@@ -447,13 +489,20 @@ def _sample_stack(*args, **kwargs):
447489 This should be called at a regular interval to collect samples.
448490 """
449491
450- self .write (
451- nanosecond_time (),
452- [
453- (str (tid ), extract_stack (frame ))
454- for tid , frame in sys ._current_frames ().items ()
455- ],
456- )
492+ now = nanosecond_time ()
493+ raw_sample = {
494+ tid : extract_stack (frame , cwd , last_sample [0 ].get (tid ))
495+ for tid , frame in sys ._current_frames ().items ()
496+ }
497+
498+ last_sample [0 ] = raw_sample
499+
500+ sample = [
501+ (str (tid ), (stack_id , stack ))
502+ for tid , (stack_id , stack , _ ) in raw_sample .items ()
503+ ]
504+
505+ self .write (now , sample )
457506
458507 return _sample_stack
459508
0 commit comments