Skip to content

Commit 5d8dc04

Browse files
committed
chore: update logic for row metric
1 parent b7ca04a commit 5d8dc04

File tree

2 files changed

+20
-17
lines changed

2 files changed

+20
-17
lines changed

src/ydata_profiling/utils/common.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,22 @@ def calculate_nrows(df):
148148
149149
Returns: int, approximate number of rows
150150
"""
151-
try:
152-
n_partitions = df.rdd.getNumPartitions()
153-
154-
nrows = (
155-
df.rdd.mapPartitionsWithIndex(
156-
lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]
157-
).collect()[0]
158-
* n_partitions
159-
)
160-
except:
161-
nrows = (
162-
0 # returns 0 in case it was not possible to compute it from the partition
163-
)
151+
if isinstance(df, pd.DataFrame):
152+
if df is not None:
153+
nrows = len(df)
154+
else:
155+
nrows = 0
156+
else:
157+
try:
158+
n_partitions = df.rdd.getNumPartitions()
159+
160+
nrows = (
161+
df.rdd.mapPartitionsWithIndex(
162+
lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]
163+
).collect()[0]
164+
* n_partitions
165+
)
166+
except:
167+
nrows = 0
164168

165169
return nrows

src/ydata_profiling/utils/logger.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,18 @@ def info_def_report(self, df, timeseries: bool) -> None: # noqa: ANN001
2222
ncols = len(df.columns)
2323
except AttributeError:
2424
ncols=0
25-
25+
26+
nrows = calculate_nrows(df)
27+
2628
if isinstance(df, pd.DataFrame):
2729
dataframe = "pandas"
2830
report_type = "regular"
29-
nrows = len(df)
3031
elif df is None:
3132
dataframe = "pandas"
3233
report_type = "compare"
33-
nrows = len(df)
3434
else:
3535
dataframe = "spark"
3636
report_type = "regular"
37-
nrows = calculate_nrows(df)
3837

3938
dbx = is_running_in_databricks()
4039
datatype = "timeseries" if timeseries else "tabular"

0 commit comments

Comments
 (0)