Skip to content

Commit 4ff07ed

Browse files
committed
Add function to get column specifications of df
1 parent 4c3ba5b commit 4ff07ed

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

asreviewcontrib/preprocess/io/io_utils.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@
22

33
import numpy as np
44
import pandas as pd
5-
from asreview.io.utils import _standardize_dataframe
6-
from asreviewcontrib.preprocess.config import COLS_FOR_DEDUPE
5+
from asreview.config import COLUMN_DEFINITIONS
6+
from asreview.io.utils import _standardize_dataframe, type_from_column
7+
from asreviewcontrib.preprocess.config import (
8+
COLS_FOR_DEDUPE,
9+
DEDUPLICATION_COLUMN_DEFINITIONS,
10+
)
711

812

913
def _standardize_dataframe_for_deduplication(df, column_spec={}):
@@ -70,3 +74,22 @@ def _standardize_dataframe_for_deduplication(df, column_spec={}):
7074
)
7175

7276
return df, all_column_spec
77+
78+
79+
def _get_column_spec(df):
80+
all_column_spec = {}
81+
82+
# map columns on column specification
83+
col_names = list(df)
84+
for column_name in col_names:
85+
86+
data_type = type_from_column(column_name, DEDUPLICATION_COLUMN_DEFINITIONS)
87+
if data_type is not None:
88+
all_column_spec[data_type] = column_name
89+
continue
90+
91+
data_type = type_from_column(column_name, COLUMN_DEFINITIONS)
92+
if data_type is not None:
93+
all_column_spec[data_type] = column_name
94+
95+
return all_column_spec

0 commit comments

Comments
 (0)