Skip to content

Commit 3d2f582

Browse files
authored
[MAINTENANCE] SimpleDateTimeFormatStringParameterBuilder and general RBP example config updates (great-expectations#4304)
* Add a few missing formats * Refactor imports of DEFAULT_CANDIDATE_STRINGS and return the values in parameter builder details * Refactor so config parameters aren't reused * Bug in how candidate strings was instantiated * Update existing tests with candidate strings in details section
1 parent a905de9 commit 3d2f582

File tree

5 files changed

+261
-232
lines changed

5 files changed

+261
-232
lines changed

great_expectations/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py

Lines changed: 67 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,68 @@
1515

1616
logger = logging.getLogger(__name__)
1717

18+
DEFAULT_CANDIDATE_STRINGS: Set[str] = {
19+
"%H:%M:%S",
20+
"%H:%M:%S,%f",
21+
"%H:%M:%S.%f",
22+
"%Y %b %d %H:%M:%S.%f",
23+
"%Y %b %d %H:%M:%S.%f %Z",
24+
"%Y %b %d %H:%M:%S.%f*%Z",
25+
"%Y%m%d %H:%M:%S.%f",
26+
"%Y-%m-%d",
27+
"%Y-%m-%d %H:%M:%S",
28+
"%Y-%m-%d %H:%M:%S %z",
29+
"%Y-%m-%d %H:%M:%S%z",
30+
"%Y-%m-%d %H:%M:%S,%f",
31+
"%Y-%m-%d %H:%M:%S,%f%z",
32+
"%Y-%m-%d %H:%M:%S.%f",
33+
"%Y-%m-%d %H:%M:%S.%f%z",
34+
"%Y-%m-%d'T'%H:%M:%S",
35+
"%Y-%m-%d'T'%H:%M:%S%z",
36+
"%Y-%m-%d'T'%H:%M:%S'%z'",
37+
"%Y-%m-%d'T'%H:%M:%S.%f",
38+
"%Y-%m-%d'T'%H:%M:%S.%f'%z'",
39+
"%Y-%m-%d*%H:%M:%S",
40+
"%Y-%m-%d*%H:%M:%S:%f",
41+
"%Y-%m-%dT%z",
42+
"%Y/%m/%d",
43+
"%Y/%m/%d*%H:%M:%S",
44+
"%b %d %H:%M:%S",
45+
"%b %d %H:%M:%S %Y",
46+
"%b %d %H:%M:%S %z",
47+
"%b %d %H:%M:%S %z %Y",
48+
"%b %d %Y %H:%M:%S",
49+
"%b %d, %Y %H:%M:%S %p",
50+
"%d %b %Y %H:%M:%S",
51+
"%d %b %Y %H:%M:%S*%f",
52+
"%d-%b-%Y %H:%M:%S",
53+
"%d-%b-%Y %H:%M:%S.%f",
54+
"%d-%m-%Y",
55+
"%d/%b %H:%M:%S,%f",
56+
"%d/%b/%Y %H:%M:%S",
57+
"%d/%b/%Y:%H:%M:%S",
58+
"%d/%b/%Y:%H:%M:%S %z",
59+
"%d/%m/%Y",
60+
"%m%d_%H:%M:%S",
61+
"%m%d_%H:%M:%S.%f",
62+
"%m-%d-%Y",
63+
"%m/%d/%Y",
64+
"%m/%d/%Y %H:%M:%S %p",
65+
"%m/%d/%Y %H:%M:%S %p:%f",
66+
"%m/%d/%Y %H:%M:%S %z",
67+
"%m/%d/%Y*%H:%M:%S",
68+
"%m/%d/%Y*%H:%M:%S*%f",
69+
"%m/%d/%y %H:%M:%S %z",
70+
"%m/%d/%y*%H:%M:%S",
71+
"%y%m%d %H:%M:%S",
72+
"%y-%m-%d",
73+
"%y-%m-%d %H:%M:%S",
74+
"%y-%m-%d %H:%M:%S,%f",
75+
"%y-%m-%d %H:%M:%S,%f %z",
76+
"%y/%m/%d",
77+
"%y/%m/%d %H:%M:%S",
78+
}
79+
1880

1981
class SimpleDateFormatStringParameterBuilder(ParameterBuilder):
2082
"""
@@ -23,63 +85,6 @@ class SimpleDateFormatStringParameterBuilder(ParameterBuilder):
2385
has the lowest unexpected_count ratio.
2486
"""
2587

26-
CANDIDATE_STRINGS: Set[str] = {
27-
"%Y-%m-%d",
28-
"%m-%d-%Y",
29-
"%y-%m-%d",
30-
"%Y-%m-%dT%z",
31-
"%Y-%m-%d %H:%M:%S",
32-
"%Y %b %d %H:%M:%S.%f %Z",
33-
"%b %d %H:%M:%S %z %Y",
34-
"%d/%b/%Y:%H:%M:%S %z",
35-
"%b %d, %Y %H:%M:%S %p",
36-
"%b %d %Y %H:%M:%S",
37-
"%b %d %H:%M:%S %Y",
38-
"%b %d %H:%M:%S %z",
39-
"%b %d %H:%M:%S",
40-
"%Y-%m-%d'T'%H:%M:%S%z",
41-
"%Y-%m-%d'T'%H:%M:%S.%f'%z'",
42-
"%Y-%m-%d %H:%M:%S %z",
43-
"%Y-%m-%d %H:%M:%S%z",
44-
"%Y-%m-%d %H:%M:%S,%f",
45-
"%Y/%m/%d*%H:%M:%S",
46-
"%Y %b %d %H:%M:%S.%f*%Z",
47-
"%Y %b %d %H:%M:%S.%f",
48-
"%Y-%m-%d %H:%M:%S,%f%z",
49-
"%Y-%m-%d %H:%M:%S.%f",
50-
"%Y-%m-%d %H:%M:%S.%f%z",
51-
"%Y-%m-%d'T'%H:%M:%S.%f",
52-
"%Y-%m-%d'T'%H:%M:%S",
53-
"%Y-%m-%d'T'%H:%M:%S'%z'",
54-
"%Y-%m-%d*%H:%M:%S:%f",
55-
"%Y-%m-%d*%H:%M:%S",
56-
"%y-%m-%d %H:%M:%S,%f %z",
57-
"%y-%m-%d %H:%M:%S,%f",
58-
"%y-%m-%d %H:%M:%S",
59-
"%y/%m/%d %H:%M:%S",
60-
"%y%m%d %H:%M:%S",
61-
"%Y%m%d %H:%M:%S.%f",
62-
"%m/%d/%y*%H:%M:%S",
63-
"%m/%d/%Y*%H:%M:%S",
64-
"%m/%d/%Y*%H:%M:%S*%f",
65-
"%m/%d/%y %H:%M:%S %z",
66-
"%m/%d/%Y %H:%M:%S %z",
67-
"%H:%M:%S",
68-
"%H:%M:%S.%f",
69-
"%H:%M:%S,%f",
70-
"%d/%b %H:%M:%S,%f",
71-
"%d/%b/%Y:%H:%M:%S",
72-
"%d/%b/%Y %H:%M:%S",
73-
"%d-%b-%Y %H:%M:%S",
74-
"%d-%b-%Y %H:%M:%S.%f",
75-
"%d %b %Y %H:%M:%S",
76-
"%d %b %Y %H:%M:%S*%f",
77-
"%m%d_%H:%M:%S",
78-
"%m%d_%H:%M:%S.%f",
79-
"%m/%d/%Y %H:%M:%S %p:%f",
80-
"%m/%d/%Y %H:%M:%S %p",
81-
}
82-
8388
def __init__(
8489
self,
8590
name: str,
@@ -117,7 +122,10 @@ def __init__(
117122

118123
self._threshold = threshold
119124

120-
self._candidate_strings = candidate_strings
125+
if candidate_strings is not None and isinstance(candidate_strings, list):
126+
self._candidate_strings = set(candidate_strings)
127+
else:
128+
self._candidate_strings = DEFAULT_CANDIDATE_STRINGS
121129

122130
@property
123131
def fully_qualified_parameter_name(self) -> str:
@@ -193,10 +201,6 @@ def _build_parameters(
193201
variables=variables,
194202
parameters=parameters,
195203
)
196-
if candidate_strings is not None and isinstance(candidate_strings, list):
197-
candidate_strings = set(candidate_strings)
198-
else:
199-
candidate_strings = SimpleDateFormatStringParameterBuilder.CANDIDATE_STRINGS
200204

201205
# Gather "metric_value_kwargs" for all candidate "strftime_format" strings.
202206
fmt_string: str
@@ -264,5 +268,6 @@ def _build_parameters(
264268
best_fmt_string,
265269
{
266270
"success_ratio": best_ratio,
271+
"candidate_strings": sorted(candidate_strings),
267272
},
268273
)

0 commit comments

Comments
 (0)