|
1 | 1 | import os |
2 | 2 | import sys |
| 3 | +import datetime as dt |
3 | 4 |
|
4 | 5 | sys.path.insert(1, os.path.join(sys.path[0], "..")) |
5 | 6 |
|
6 | 7 | from orso.dataframe import DataFrame |
| 8 | +from orso.types import get_orso_type, OrsoTypes |
7 | 9 | from tests import cities |
8 | 10 | import re |
9 | 11 | from typing import List |
10 | 12 |
|
11 | 13 | lengths = { |
12 | | - 0: 5, |
13 | | - 1: 6, |
14 | | - 2: 7, |
15 | | - 3: 8, |
16 | | - 4: 9, |
17 | | - 5: 10, |
18 | | - 6: 11, |
19 | | - 7: 12, |
20 | | - 8: 12, |
21 | | - 9: 12, |
22 | | - 10: 12, |
| 14 | + 0: 6, # Updated: now includes footer line |
| 15 | + 1: 7, |
| 16 | + 2: 8, |
| 17 | + 3: 9, |
| 18 | + 4: 10, |
| 19 | + 5: 11, |
| 20 | + 6: 12, |
| 21 | + 7: 13, |
| 22 | + 8: 13, |
| 23 | + 9: 13, |
| 24 | + 10: 13, |
23 | 25 | } |
24 | 26 |
|
25 | 27 |
|
@@ -56,10 +58,177 @@ def test_display_ascii_greedy(): |
56 | 58 | df = DataFrame(cities.values).head(i) |
57 | 59 | df.materialize() |
58 | 60 |
|
59 | | - ascii = df.display(limit=3, show_types=True) |
60 | | - |
61 | | - assert len(ascii.split("\n")) == lengths[i], i |
62 | | - assert len(find_all_substrings(ascii, "Tokyo")) == (1 if i != 0 else 0) |
| 61 | + ascii_output = df.display(limit=3, show_types=True) |
| 62 | + |
| 63 | + assert len(ascii_output.split("\n")) == lengths[i], i |
| 64 | + assert len(find_all_substrings(ascii_output, "Tokyo")) == (1 if i != 0 else 0) |
| 65 | + |
| 66 | + |
| 67 | +def test_row_count_footer_single_row(): |
| 68 | + """Test that row count footer is accurate for a single row DataFrame""" |
| 69 | + df = DataFrame([{"a": 1, "b": 2}]) |
| 70 | + output = df.display() |
| 71 | + # Should show "[ 1 rows x 2 columns ]" in the footer |
| 72 | + assert "[ 1 rows x 2 columns ]" in output |
| 73 | + |
| 74 | + |
| 75 | +def test_row_count_footer_multiple_rows(): |
| 76 | + """Test that row count footer is accurate for multiple rows""" |
| 77 | + data = [{"a": i, "b": i * 2} for i in range(10)] |
| 78 | + df = DataFrame(data) |
| 79 | + output = df.display() |
| 80 | + # Should show "[ 10 rows x 2 columns ]" in the footer |
| 81 | + assert "[ 10 rows x 2 columns ]" in output |
| 82 | + |
| 83 | + |
| 84 | +def test_row_count_footer_lazy_dataframe(): |
| 85 | + """Test that row count footer is accurate for lazy (generator-based) DataFrames""" |
| 86 | + data = [{"a": i, "b": i * 2} for i in range(50)] |
| 87 | + df = DataFrame(data) |
| 88 | + output = df.display(limit=5) |
| 89 | + # With top_and_tail enabled, display shows 5 head + 5 tail = 10 rows |
| 90 | + # So the footer should show [ 10 rows x 2 columns ] for the displayed subset |
| 91 | + # NOT the original 50 rows |
| 92 | + assert "[ 10 rows x 2 columns ]" in output |
| 93 | + |
| 94 | + |
| 95 | +def test_row_indices_consistency(): |
| 96 | + """Test that row indices are consistent and sequential""" |
| 97 | + data = [{"a": i, "b": i * 2} for i in range(20)] |
| 98 | + df = DataFrame(data) |
| 99 | + output = df.display(limit=5) |
| 100 | + |
| 101 | + # Extract row indices from the display |
| 102 | + # Remove ANSI color codes for easier parsing |
| 103 | + ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') |
| 104 | + clean_output = ansi_escape.sub('', output) |
| 105 | + |
| 106 | + lines = clean_output.split("\n") |
| 107 | + row_indices = [] |
| 108 | + for line in lines: |
| 109 | + # Look for lines with │ that contain data rows (not header/footer) |
| 110 | + if "│" in line and "─" not in line and "═" not in line and "┌" not in line: |
| 111 | + # Try to extract the first number after the first │ |
| 112 | + parts = line.split("│") |
| 113 | + if len(parts) > 1: |
| 114 | + try: |
| 115 | + idx = int(parts[1].strip()) |
| 116 | + row_indices.append(idx) |
| 117 | + except (ValueError, IndexError): |
| 118 | + pass |
| 119 | + |
| 120 | + # With top_and_tail, should display head (1-5) + tail (6-10) |
| 121 | + # Note: The tail rows show indices 6-10 from enumeration, not the actual row numbers |
| 122 | + assert len(row_indices) >= 10, f"Expected at least 10 row indices, got {len(row_indices)}" |
| 123 | + assert row_indices[:5] == [1, 2, 3, 4, 5], f"Expected [1,2,3,4,5], got {row_indices[:5]}" |
| 124 | + # The last 5 should be 6-10 (tail enumeration) |
| 125 | + assert row_indices[-5:] == [6, 7, 8, 9, 10], f"Expected [6,7,8,9,10], got {row_indices[-5:]}" |
| 126 | + |
| 127 | + |
| 128 | +def test_interval_formatting_from_array(): |
| 129 | + """Test that intervals represented as [days, microseconds] arrays are handled""" |
| 130 | + # Create a DataFrame with interval-like data |
| 131 | + # [0, 36000000000] microseconds = 10 hours |
| 132 | + data = [{"interval": ["0", "36000000000"]}] |
| 133 | + df = DataFrame(data) |
| 134 | + output = df.display() |
| 135 | + |
| 136 | + # The interval heuristic checks for ARRAY<INTEGER> with 2 elements, |
| 137 | + # but raw data will default to unknown types. The display should still work, |
| 138 | + # it just may not format as an interval. |
| 139 | + # Check that the display includes the interval column |
| 140 | + assert "interval" in output |
| 141 | + |
| 142 | + |
| 143 | +def test_get_orso_type_parser_simple_types(): |
| 144 | + """Test the get_orso_type parser with simple type strings""" |
| 145 | + assert get_orso_type("INTEGER") == OrsoTypes.INTEGER |
| 146 | + assert get_orso_type("VARCHAR") == OrsoTypes.VARCHAR |
| 147 | + assert get_orso_type("DOUBLE") == OrsoTypes.DOUBLE |
| 148 | + assert get_orso_type("BOOLEAN") == OrsoTypes.BOOLEAN |
| 149 | + assert get_orso_type("DATE") == OrsoTypes.DATE |
| 150 | + assert get_orso_type("TIMESTAMP") == OrsoTypes.TIMESTAMP |
| 151 | + assert get_orso_type("INTERVAL") == OrsoTypes.INTERVAL |
| 152 | + |
| 153 | + |
| 154 | +def test_get_orso_type_parser_complex_types(): |
| 155 | + """Test the get_orso_type parser with complex type strings""" |
| 156 | + assert get_orso_type("ARRAY<INTEGER>") == OrsoTypes.ARRAY |
| 157 | + assert get_orso_type("ARRAY<VARCHAR>") == OrsoTypes.ARRAY |
| 158 | + assert get_orso_type("VARCHAR[255]") == OrsoTypes.VARCHAR |
| 159 | + assert get_orso_type("DECIMAL(10,2)") == OrsoTypes.DECIMAL |
| 160 | + assert get_orso_type("BLOB[1024]") == OrsoTypes.BLOB |
| 161 | + |
| 162 | + |
| 163 | +def test_get_orso_type_parser_case_insensitive(): |
| 164 | + """Test that get_orso_type is case-insensitive""" |
| 165 | + assert get_orso_type("integer") == OrsoTypes.INTEGER |
| 166 | + assert get_orso_type("INTEGER") == OrsoTypes.INTEGER |
| 167 | + assert get_orso_type("InTeGeR") == OrsoTypes.INTEGER |
| 168 | + assert get_orso_type("array<integer>") == OrsoTypes.ARRAY |
| 169 | + assert get_orso_type("ARRAY<INTEGER>") == OrsoTypes.ARRAY |
| 170 | + |
| 171 | + |
| 172 | +def test_get_orso_type_parser_invalid_type(): |
| 173 | + """Test that get_orso_type raises ValueError for invalid types""" |
| 174 | + try: |
| 175 | + get_orso_type("INVALID_TYPE") |
| 176 | + assert False, "Should have raised ValueError" |
| 177 | + except ValueError as e: |
| 178 | + assert "Unknown" in str(e) |
| 179 | + |
| 180 | + |
| 181 | +def test_display_with_mixed_data_types(): |
| 182 | + """Test display with mixed data types to ensure no regressions""" |
| 183 | + data = [ |
| 184 | + { |
| 185 | + "int_col": 42, |
| 186 | + "float_col": 3.14, |
| 187 | + "str_col": "hello", |
| 188 | + "bool_col": True, |
| 189 | + "date_col": dt.date(2025, 10, 19), |
| 190 | + } |
| 191 | + ] |
| 192 | + df = DataFrame(data) |
| 193 | + output = df.display() |
| 194 | + |
| 195 | + # All columns should be present in output |
| 196 | + assert "int_col" in output |
| 197 | + assert "float_col" in output |
| 198 | + assert "str_col" in output |
| 199 | + assert "bool_col" in output |
| 200 | + assert "date_col" in output |
| 201 | + assert "42" in output |
| 202 | + assert "3.14" in output |
| 203 | + assert "hello" in output |
| 204 | + assert "2025-10-19" in output |
| 205 | + |
| 206 | + |
| 207 | +def test_display_with_null_values(): |
| 208 | + """Test that null values are displayed correctly""" |
| 209 | + data = [ |
| 210 | + {"a": 1, "b": None}, |
| 211 | + {"a": None, "b": 2}, |
| 212 | + ] |
| 213 | + df = DataFrame(data) |
| 214 | + output = df.display() |
| 215 | + |
| 216 | + # Should show "null" for None values |
| 217 | + assert "null" in output |
| 218 | + assert "2 rows" in output |
| 219 | + |
| 220 | + |
| 221 | +def test_display_footer_rows_columns_format(): |
| 222 | + """Test that the footer format is consistent""" |
| 223 | + data = [{"x": i, "y": i * 2, "z": i * 3} for i in range(5)] |
| 224 | + df = DataFrame(data) |
| 225 | + output = df.display() |
| 226 | + |
| 227 | + # Should end with footer in format "[ N rows x M columns ]" |
| 228 | + assert "[ 5 rows x 3 columns ]" in output |
| 229 | + # Verify it's in the last line |
| 230 | + last_line = output.split("\n")[-1] |
| 231 | + assert "[ 5 rows x 3 columns ]" in last_line |
63 | 232 |
|
64 | 233 |
|
65 | 234 |
|
|
0 commit comments