Skip to content

Commit b148aa2

Browse files
author
Shinichi Takii
authored
Merge pull request shinichi-takii#27 from shinichi-takii/feature/fix-postgres-data-type
add supports PostgreSQL data type and Python 3.7
2 parents c88cc8f + bcecb85 commit b148aa2

File tree

9 files changed

+277
-85
lines changed

9 files changed

+277
-85
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
language: python
2+
dist: xenial
3+
sudo: true
24

35
python:
46
- "3.4"
57
- "3.5"
68
- "3.6"
9+
- "3.7"
710

811
# command to install dependencies
912
install:

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# Changelog
22

3+
## 1.2.1
4+
- Add supports for Python 3.7.
5+
- Pass Python 3.7 test.
6+
- Add supports PostgreSQL data type.
7+
- `CHARACTER VARYING`
8+
- `JSON`
9+
- Array type
10+
- Fix parse `DEFAULT` value.
11+
- Add decimal point to `DEFAULT` parse character.
12+
313
## 1.2.0
414
- Add `DdlParseTable.to_bigquery_ddl` function.
515
- BigQuery DDL (CREATE TABLE) statement generate function.

LICENSE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
BSD 3-Clause License
22

3-
Copyright (c) 2018, Shinichi Takii
3+
Copyright (c) 2019, Shinichi Takii
44
All rights reserved.
55

66
Redistribution and use in source and binary forms, with or without

ddlparse/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .ddlparse import *
99

1010
__copyright__ = 'Copyright (C) 2018-2019 Shinichi Takii'
11-
__version__ = '1.2.0'
11+
__version__ = '1.2.1'
1212
__license__ = 'BSD-3-Clause'
1313
__author__ = 'Shinichi Takii'
1414
__author_email__ = '[email protected]'

ddlparse/ddlparse.py

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
"""Parse DDL statements"""
99

10-
import re
10+
import re, textwrap, json
1111
from collections import OrderedDict
1212
from enum import IntEnum
1313

@@ -74,16 +74,19 @@ def get_name(self, name_case=DdlParseBase.NAME_CASE.original):
7474
class DdlParseColumn(DdlParseTableColumnBase):
7575
"""Column define info"""
7676

77-
def __init__(self, name, data_type_array, constraint=None, source_database=None):
77+
def __init__(self, name, data_type_array, array_brackets=None, constraint=None, source_database=None):
7878
"""
7979
:param data_type_array[]: Column data type ['data type name'] or ['data type name', '(length)'] or ['data type name', '(precision, scale)']
80+
:param array_brackets: Column array brackets string '[]' or '[][]...'
8081
:param constraint: Column constraint string
8182
:param source_database: enum DdlParse.DATABASE
8283
"""
84+
8385
super().__init__(source_database)
8486
self._name = name
8587
self._set_data_type(data_type_array)
8688
self.constraint = constraint
89+
self._array_dimensional = 0 if array_brackets is None else array_brackets.count('[]')
8790

8891
@property
8992
def data_type(self):
@@ -109,11 +112,12 @@ def _set_data_type(self, data_type_array):
109112
if len(data_type_array) < 2:
110113
return
111114

112-
matches = re.findall(r"(\d+)\s*,*\s*(\d*)", data_type_array[1])
115+
matches = re.findall(r"(\d+)\s*,*\s*(\d*)", data_type_array[-1])
113116
if len(matches) > 0:
114117
self._length = int(matches[0][0])
115118
self._scale = None if len(matches[0]) < 2 or matches[0][1] == "" or int(matches[0][1]) == 0 else int(matches[0][1])
116-
else:
119+
120+
if re.search(r"^\D+", data_type_array[1]):
117121
self._data_type += " {}".format(data_type_array[1])
118122

119123

@@ -136,6 +140,11 @@ def constraint(self, constraint):
136140
self._pk = False if self._constraint is None or not re.search("PRIMARY KEY", self._constraint) else True
137141
self._unique = False if self._constraint is None or not re.search("UNIQUE", self._constraint) else True
138142

143+
@property
144+
def array_dimensional(self):
145+
"""array dimensional number"""
146+
return self._array_dimensional
147+
139148
@property
140149
def not_null(self):
141150
return self._not_null
@@ -166,7 +175,7 @@ def bigquery_data_type(self):
166175

167176
# BigQuery data type = {source_database: [data type, ...], ...}
168177
BQ_DATA_TYPE_DIC = OrderedDict()
169-
BQ_DATA_TYPE_DIC["STRING"] = {None: [re.compile(r"(CHAR|TEXT|CLOB)")]}
178+
BQ_DATA_TYPE_DIC["STRING"] = {None: [re.compile(r"(CHAR|TEXT|CLOB|JSON)")]}
170179
BQ_DATA_TYPE_DIC["INTEGER"] = {None: [re.compile(r"INT|SERIAL|YEAR")]}
171180
BQ_DATA_TYPE_DIC["FLOAT"] = {None: [re.compile(r"(FLOAT|DOUBLE)"), "REAL", "MONEY"]}
172181
BQ_DATA_TYPE_DIC["DATETIME"] = {
@@ -231,12 +240,48 @@ def bigquery_standard_data_type(self):
231240
def bigquery_mode(self):
232241
"""Get BigQuery constraint"""
233242

234-
return "REQUIRED" if self.not_null else "NULLABLE"
243+
if self.array_dimensional > 0:
244+
return "REPEATED"
245+
elif self.not_null:
246+
return "REQUIRED"
247+
else:
248+
return "NULLABLE"
235249

236250
def to_bigquery_field(self, name_case=DdlParseBase.NAME_CASE.original):
237251
"""Generate BigQuery JSON field define"""
238252

239-
return '{{"name": "{}", "type": "{}", "mode": "{}"}}'.format(self.get_name(name_case), self.bigquery_data_type, self.bigquery_mode)
253+
col_name = self.get_name(name_case)
254+
mode = self.bigquery_mode
255+
256+
if self.array_dimensional <= 1:
257+
# no or one dimensional array data type
258+
type = self.bigquery_legacy_data_type
259+
260+
else:
261+
# multiple dimensional array data type
262+
type = "RECORD"
263+
264+
fields = OrderedDict()
265+
fields_cur = fields
266+
267+
for i in range(1, self.array_dimensional):
268+
is_last = True if i == self.array_dimensional - 1 else False
269+
270+
fields_cur['fields'] = [OrderedDict()]
271+
fields_cur = fields_cur['fields'][0]
272+
273+
fields_cur['name'] = "dimension_{}".format(i)
274+
fields_cur['type'] = self.bigquery_legacy_data_type if is_last else "RECORD"
275+
fields_cur['mode'] = self.bigquery_mode if is_last else "REPEATED"
276+
277+
col = OrderedDict()
278+
col['name'] = col_name
279+
col['type'] = type
280+
col['mode'] = mode
281+
if self.array_dimensional > 1:
282+
col['fields'] = fields['fields']
283+
284+
return json.dumps(col)
240285

241286

242287
class DdlParseColumnDict(OrderedDict, DdlParseBase):
@@ -258,11 +303,11 @@ def __getitem__(self, key):
258303
def __setitem__(self, key, value):
259304
super().__setitem__(key.lower(), value)
260305

261-
def append(self, column_name, data_type_array=None, constraint=None, source_database=None):
306+
def append(self, column_name, data_type_array=None, array_brackets=None, constraint=None, source_database=None):
262307
if source_database is None:
263308
source_database = self.source_database
264309

265-
column = DdlParseColumn(column_name, data_type_array, constraint, source_database)
310+
column = DdlParseColumn(column_name, data_type_array, array_brackets, constraint, source_database)
266311
self.__setitem__(column_name, column)
267312
return column
268313

@@ -366,18 +411,42 @@ def to_bigquery_ddl(self, name_case=DdlParseBase.NAME_CASE.original):
366411
else:
367412
dataset = self.schema
368413

369-
cols_def = []
414+
cols_defs = []
370415
for col in self.columns.values():
371-
cols_def.append("{name} {type}{not_null}".format(
372-
name=col.get_name(name_case),
373-
type=col.bigquery_standard_data_type,
374-
not_null=" NOT NULL" if col.not_null else "",
416+
col_name = col.get_name(name_case)
417+
418+
if col.array_dimensional < 1:
419+
# no array data type
420+
type = col.bigquery_standard_data_type
421+
not_null = " NOT NULL" if col.not_null else ""
422+
423+
else:
424+
# one or multiple dimensional array data type
425+
type_front = "ARRAY<"
426+
type_back = ">"
427+
for i in range(1, col.array_dimensional):
428+
type_front += "STRUCT<dimension_{} ARRAY<".format(i)
429+
type_back += ">>"
430+
431+
type = "{}{}{}".format(type_front, col.bigquery_standard_data_type, type_back)
432+
not_null = ""
433+
434+
cols_defs.append("{name} {type}{not_null}".format(
435+
name=col_name,
436+
type=type,
437+
not_null=not_null,
375438
))
376439

377-
return "#standardSQL\nCREATE TABLE `project.{dataset}.{table}`\n(\n {colmns_define}\n)".format(
440+
return textwrap.dedent(
441+
"""\
442+
#standardSQL
443+
CREATE TABLE `project.{dataset}.{table}`
444+
(
445+
{colmns_define}
446+
)""").format(
378447
dataset=dataset,
379448
table=self.get_name(name_case),
380-
colmns_define=",\n ".join(cols_def),
449+
colmns_define=",\n ".join(cols_defs),
381450
)
382451

383452

@@ -411,10 +480,11 @@ class DdlParse(DdlParseBase):
411480
+ Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("name") + Optional(_SUPPRESS_QUOTE)
412481
+ Group(
413482
Word(alphanums+"_")
414-
+ Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION"))
483+
+ Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING"))
415484
+ Optional(_LPAR + Regex(r"\d+\s*,*\s*\d*") + Optional(Suppress(_CHAR_SEMANTICS | _BYTE_SEMANTICS)) + _RPAR)
416485
)("type")
417-
+ Optional(Word(alphanums+"_': -"))("constraint")
486+
+ Optional(Word("[]"))("array_brackets")
487+
+ Optional(Word(alphanums+"_': -."))("constraint")
418488
)("column")
419489
)
420490
)("columns")
@@ -483,7 +553,8 @@ def parse(self, ddl=None, source_database=None):
483553
# add column
484554
col = self._table.columns.append(
485555
column_name=ret_col["name"],
486-
data_type_array=ret_col["type"])
556+
data_type_array=ret_col["type"],
557+
array_brackets=ret_col['array_brackets'] if "array_brackets" in ret_col else None)
487558

488559
if "constraint" in ret_col:
489560
col.constraint = ret_col["constraint"]

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def _test_requirements():
6262
'Programming Language :: Python :: 3.4',
6363
'Programming Language :: Python :: 3.5',
6464
'Programming Language :: Python :: 3.6',
65+
'Programming Language :: Python :: 3.7',
6566
'Topic :: Database',
6667
'Topic :: Software Development :: Libraries :: Python Modules',
6768
],

test-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
pytest
1+
pytest>=3.6
22
pytest-cov
33
coveralls
44
codecov

0 commit comments

Comments
 (0)