Skip to content

chore: add exec_seconds attribute to ExecutionMetrics #1104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 6, 2024
31 changes: 15 additions & 16 deletions bigframes/session/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,47 @@ class ExecutionMetrics:
execution_count: int = 0
slot_millis: int = 0
bytes_processed: int = 0
execution_secs: float = 0

def count_job_stats(self, query_job: bq_job.QueryJob):
stats = get_performance_stats(query_job)
if stats is not None:
bytes_processed, slot_millis, exec_seconds = stats
bytes_processed, slot_millis, execution_secs = stats
self.execution_count += 1
self.bytes_processed += bytes_processed
self.slot_millis += slot_millis
self.execution_secs += execution_secs
if LOGGING_NAME_ENV_VAR in os.environ:
# when running notebooks via pytest nbmake
write_stats_to_disk(bytes_processed, slot_millis, exec_seconds)
write_stats_to_disk(bytes_processed, slot_millis, execution_secs)


def get_performance_stats(
query_job: bigquery.QueryJob,
) -> Optional[Tuple[int, int, Optional[float]]]:
) -> Optional[Tuple[int, int, float]]:
"""Parse the query job for performance stats.

Return None if the stats do not reflect real work done in bigquery.
"""

if (
query_job.configuration.dry_run
or query_job.created is None
or query_job.ended is None
):
return None

bytes_processed = query_job.total_bytes_processed
if not isinstance(bytes_processed, int):
return None # filter out mocks
if query_job.configuration.dry_run:
# dry run stats are just predictions of the real run
bytes_processed = 0

slot_millis = query_job.slot_millis
if not isinstance(slot_millis, int):
return None # filter out mocks

if query_job.configuration.dry_run:
# dry run stats are just predictions of the real run
slot_millis = 0

exec_seconds = (
(query_job.ended - query_job.created).total_seconds()
if query_job.created is not None and query_job.ended is not None
else None
)
execution_secs = (query_job.ended - query_job.created).total_seconds()

return bytes_processed, slot_millis, exec_seconds
return bytes_processed, slot_millis, execution_secs


def write_stats_to_disk(
Expand Down