Skip to content

Commit 22cccd7

Browse files
committed
MYC-360 Parser fixes:
"~x" – should be treated as bitwise negation, while "!x" is logical negation "a^22" – ERROR: Unknown character at 1 "c > cast(14.01 as decimal(3,2)) " – ERROR: CAST type invalid. ! "X'65'" literal 0x65 literal
1 parent 5111798 commit 22cccd7

File tree

10 files changed

+308
-17
lines changed

10 files changed

+308
-17
lines changed

cdk/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ if(WITH_TESTS)
213213
extra/process_launcher/process_launcher.cc
214214
extra/common/exception.cc)
215215
target_include_directories(process_launcher PRIVATE
216-
${CMAKE_SOURCE_DIR}/extra/process_launcher/
217-
${CMAKE_SOURCE_DIR}/extra/common)
216+
${PROJECT_SOURCE_DIR}/extra/process_launcher/
217+
${PROJECT_SOURCE_DIR}/extra/common)
218218

219219
# quiet compile warnings outside of our code
220220
if (MSVC)

cdk/include/mysql/cdk/protocol/mysqlx_expr.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,16 @@ class Scalar_processor
137137
virtual void num(float) =0;
138138
virtual void num(double) =0;
139139
virtual void yesno(bool) =0;
140-
virtual void octets(bytes) =0;
140+
141+
// source: ``Mysqlx.Resultset.ColumnMetadata`` for list of known values
142+
enum Octets_content_type
143+
{
144+
CT_PLAIN = 0x0000, // default value; general use of octets
145+
CT_GEOMETRY = 0x0001, // BYTES 0x0001 GEOMETRY (WKB encoding)
146+
CT_JSON = 0x0002, // BYTES 0x0002 JSON (text encoding)
147+
CT_XML = 0x0003 // BYTES 0x0003 XML (text encoding)
148+
};
149+
virtual void octets(bytes, Octets_content_type) =0;
141150
};
142151

143152

@@ -282,6 +291,8 @@ struct Safe_prc<protocol::mysqlx::api::Scalar_processor>
282291

283292
using Base::m_prc;
284293

294+
typedef Processor::Octets_content_type Octets_content_type;
295+
285296
void null() { return m_prc ? m_prc->null() : (void)NULL; }
286297

287298
void str(bytes val)
@@ -305,8 +316,8 @@ struct Safe_prc<protocol::mysqlx::api::Scalar_processor>
305316
void yesno(bool val)
306317
{ return m_prc ? m_prc->yesno(val) : (void)NULL; }
307318

308-
void octets(bytes data)
309-
{ return m_prc ? m_prc->octets(data) : (void)NULL; }
319+
void octets(bytes data, Octets_content_type type)
320+
{ return m_prc ? m_prc->octets(data, type) : (void)NULL; }
310321
};
311322

312323

cdk/mysqlx/converters.h

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#define CDK_MYSQLX_CONVERTERS_H
2727

2828
#include <mysql/cdk/converters.h>
29+
#include <mysql/cdk/protocol/mysqlx_expr.h>
30+
#include <mysql/cdk/foundation/codec.h>
2931

3032
namespace cdk {
3133
namespace mysqlx {
@@ -73,7 +75,7 @@ struct Scalar_prc_converter
7375
m_proc->str(utf8);
7476
}
7577

76-
virtual void value(Type_info, const Format_info&, bytes data)
78+
virtual void value(Type_info type, const Format_info &fi, bytes data)
7779
{
7880
/*
7981
TODO: Eventually we should Look at type/format info and do
@@ -85,7 +87,60 @@ struct Scalar_prc_converter
8587
form use the same encoding that is used by protocol and thus
8688
we can simply pass the raw bytes without any modifications.
8789
*/
88-
m_proc->octets(data);
90+
switch (type)
91+
{
92+
case cdk::TYPE_INTEGER:
93+
{
94+
cdk::Codec<cdk::TYPE_INTEGER> codec(fi);
95+
96+
int64_t val;
97+
codec.from_bytes(data, val);
98+
99+
m_proc->num(val);
100+
}
101+
break;
102+
case cdk::TYPE_FLOAT:
103+
{
104+
cdk::Codec<cdk::TYPE_FLOAT> codec(fi);
105+
106+
double val;
107+
codec.from_bytes(data, val);
108+
109+
m_proc->num(val);
110+
}
111+
break;
112+
case cdk::TYPE_STRING:
113+
{
114+
cdk::Codec<cdk::TYPE_STRING> codec(fi);
115+
116+
string val;
117+
codec.from_bytes(data, val);
118+
119+
m_proc->str(bytes(val));
120+
}
121+
break;
122+
case cdk::TYPE_DATETIME:
123+
{
124+
//TODO: TYPE_DATETIME
125+
}
126+
break;
127+
case cdk::TYPE_BYTES:
128+
m_proc->octets(data,
129+
cdk::protocol::mysqlx::api::Scalar_processor::CT_PLAIN);
130+
break;
131+
case cdk::TYPE_DOCUMENT:
132+
m_proc->octets(data,
133+
cdk::protocol::mysqlx::api::Scalar_processor::CT_JSON);
134+
break;
135+
case cdk::TYPE_GEOMETRY:
136+
m_proc->octets(data, cdk::protocol::mysqlx::api::Scalar_processor::CT_GEOMETRY);
137+
break;
138+
case cdk::TYPE_XML:
139+
m_proc->octets(data, cdk::protocol::mysqlx::api::Scalar_processor::CT_XML);
140+
break;
141+
}
142+
143+
89144
}
90145

91146
};

cdk/parser/expr_parser.cc

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ PUSH_BOOST_WARNINGS
3535
POP_BOOST_WARNINGS
3636
POP_SYS_WARNINGS
3737

38+
#include <sstream>
39+
3840
// TODO: Complete the parser
3941
// TODO: Better parser errors
4042

@@ -189,6 +191,7 @@ void Expr_parser_base::parse_cast(Scalar_prc *prc)
189191
}
190192

191193

194+
192195
/**
193196
castType ::=
194197
SIGNED INTEGER?
@@ -275,7 +278,7 @@ std::string Expr_parser_base::cast_data_type_dimension(bool double_dimension)
275278
if (double_dimension && cur_token_type_is(Token::COMMA))
276279
{
277280
consume_token(Token::COMMA);
278-
result += ", " + consume_token(Token::LINTEGER);
281+
result += "," + consume_token(Token::LINTEGER);
279282
}
280283
result += ")";
281284
consume_token(Token::RPAREN);
@@ -812,11 +815,17 @@ Expression* Expr_parser_base::parse_atomic(Processor *prc)
812815
}
813816

814817
case Token::BANG:
818+
get_token();
819+
argsp = sprc->op(operator_name("!").c_str());
820+
break;
815821
case Token::NOT:
822+
get_token();
823+
argsp = sprc->op(operator_name("not").c_str());
824+
break;
816825
case Token::NEG:
817-
get_token();
818-
argsp = sprc->op(operator_name("not").c_str());
819-
break;
826+
get_token();
827+
argsp = sprc->op(operator_name("~").c_str());
828+
break;
820829

821830
default:
822831
break; // will continue with literal parsing
@@ -872,6 +881,25 @@ Expression* Expr_parser_base::parse_atomic(Processor *prc)
872881
return stored.release();
873882
}
874883
RETHROW_BOOST_LEXICAL;
884+
case Token::LHEX:
885+
try {
886+
std::stringstream ss;
887+
ss << get_token().get_text();
888+
if (neg)
889+
{
890+
int64_t val;
891+
ss >> std::hex >> val;
892+
sprc->val()->num(-val);
893+
}
894+
else
895+
{
896+
uint64_t val;
897+
ss >> std::hex >> val;
898+
sprc->val()->num(val);
899+
}
900+
return stored.release();
901+
}
902+
RETHROW_BOOST_LEXICAL;
875903

876904
case Token::TRUE_:
877905
case Token::FALSE_:

cdk/parser/expr_parser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,8 @@ class Expr_parser_base
547547
std::string cast_data_type_dimension(bool double_dimension = false);
548548
std::string opt_binary();
549549

550+
void parse_char(Scalar_prc*);
551+
550552
void parse_doc(Processor::Doc_prc*);
551553
void parse_arr(Processor::List_prc*);
552554

@@ -1150,7 +1152,6 @@ struct Stored_scalar
11501152

11511153
};
11521154

1153-
11541155
// --------------------------------------------------------------------------
11551156

11561157

cdk/parser/tests/parser-t.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,23 @@ const Expr_Test exprs[] =
576576
{ parser::Parser_mode::DOCUMENT, L"name LIKE :name AND age > :age" },
577577
{ parser::Parser_mode::TABLE , L"`date`->$.year"},
578578
{ parser::Parser_mode::DOCUMENT, L"count(*)" },
579+
{ parser::Parser_mode::TABLE , L"~x"},
580+
{ parser::Parser_mode::TABLE , L"a^22"},
581+
{ parser::Parser_mode::TABLE , L"a^~22"},
582+
{ parser::Parser_mode::TABLE , L" a >cast(11 as signed Int)"},
583+
{ parser::Parser_mode::TABLE , L"c > cast(14.01 as decimal(3,2))"},
584+
{ parser::Parser_mode::TABLE , L"CHARSET(CHAR(X'65'))"},
585+
{ parser::Parser_mode::TABLE , L"CHARSET(CHAR(0x65))"},
586+
{ parser::Parser_mode::TABLE , L"CHARSET(CHAR(X'65' USING utf8))"},
587+
// { parser::Parser_mode::TABLE , L"TRIM(BOTH 'x' FROM 'xxxbarxxx')"},
588+
// { parser::Parser_mode::TABLE , L"TRIM(LEADING 'x' FROM 'xxxbarxxx')"},
589+
// { parser::Parser_mode::TABLE , L"TRIM(TRAILING 'xyz' FROM 'barxxyz')"},
590+
{ parser::Parser_mode::TABLE , L"'abc' NOT LIKE 'ABC1'"},
591+
// { parser::Parser_mode::TABLE , L"'a' RLIKE '^[a-d]'"},
592+
{ parser::Parser_mode::TABLE , L"'a' REGEXP '^[a-d]'"},
593+
// { parser::Parser_mode::TABLE , L"POSITION('bar' IN 'foobarbar')"},
594+
// { parser::Parser_mode::TABLE , L"'Heoko' SOUNDS LIKE 'h1aso'"}
595+
579596
};
580597

581598

@@ -913,8 +930,8 @@ std::ostream& operator<<(std::ostream &out, URI_parts &data)
913930
cout << " user: " << data.user << endl;
914931
if (data.pwd)
915932
cout << " pwd: " << data.pwd << endl;
916-
cout << " host: " << data.host << endl;
917-
cout << " port: " << data.port << endl;
933+
cout << " host: " << data.host << endl;
934+
cout << " port: " << data.port << endl;
918935
if (data.path)
919936
cout << " path: " << data.path << endl;
920937
if (data.has_query)

cdk/parser/tokenizer.cc

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ Tokenizer::Maps::Maps()
132132
operator_names["<"] = "<";
133133
operator_names["<="] = "<=";
134134
operator_names["&"] = "&";
135+
operator_names["^"] = "^";
135136
operator_names["|"] = "|";
136137
operator_names["<<"] = "<<";
137138
operator_names[">>"] = ">>";
@@ -311,6 +312,64 @@ bool Tokenizer::parse_float_expo(size_t& i)
311312
return true;
312313
}
313314

315+
/*
316+
Check if we have a Hexadecimal literal:
317+
318+
X'12ab'
319+
x'12ab'
320+
ox12ab
321+
*/
322+
323+
bool Tokenizer::parse_hex(size_t& i)
324+
{
325+
std::string val;
326+
bool has_value = false;
327+
int start = i;
328+
if((_input[i] == 'X' || _input[i] == 'x') && next_char_is(i, '\''))
329+
{
330+
i+=2;
331+
332+
start = i;
333+
334+
for (; i < _input.size();++i)
335+
{
336+
if (_input[i] == '\'')
337+
{
338+
// We don't want the 'either (so the -2)
339+
val.assign(_input, start, i-2);
340+
341+
has_value = true;
342+
break;
343+
}
344+
}
345+
}
346+
else if (_input[i] == '0' && (next_char_is(i, 'x') || next_char_is(i, 'X')))
347+
{
348+
i+=2;
349+
350+
int start = i;
351+
352+
for (; i < _input.size() && std::isalnum(_input[i]);++i)
353+
{}
354+
355+
--i;
356+
357+
val.assign(_input, start, i-1);
358+
359+
has_value = true;
360+
361+
}
362+
363+
if (has_value)
364+
{
365+
_tokens.push_back(Token(Token::LHEX, val));
366+
367+
return true;
368+
}
369+
370+
return false;
371+
}
372+
314373
void Tokenizer::get_tokens()
315374
{
316375
for (size_t i = 0; i < _input.size(); ++i)
@@ -326,6 +385,9 @@ void Tokenizer::get_tokens()
326385
continue;
327386
}
328387

388+
if ( parse_hex(i))
389+
continue;
390+
329391
Token::TokenType tt = Token::T_NULL;
330392
size_t j=i;
331393
if (Token::T_NULL != (tt = parse_number(j)))
@@ -392,6 +454,10 @@ void Tokenizer::get_tokens()
392454
{
393455
_tokens.push_back(Token(Token::BITOR, std::string(1, c)));
394456
}
457+
else if (c == '^')
458+
{
459+
_tokens.push_back(Token(Token::BITXOR, std::string(1, c)));
460+
}
395461
else if (c == '(')
396462
{
397463
_tokens.push_back(Token(Token::LPAREN, std::string(1, c)));

cdk/parser/tokenizer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ namespace parser {
5555
X(FALSE_) \
5656
X(IN_ ) \
5757
X(LIKE) \
58+
X(RLIKE) \
5859
X(INTERVAL) \
5960
X(REGEXP) \
6061
X(ESCAPE) \
@@ -97,6 +98,7 @@ namespace parser {
9798
X(DOUBLESTAR) \
9899
X(MOD) \
99100
X(AS) \
101+
X(USING) \
100102
X(ASC) \
101103
X(DESC) \
102104
X(CAST) \
@@ -117,6 +119,7 @@ namespace parser {
117119
X(UNSIGNED) \
118120
X(INTEGER) /* 'integer' keyword */ \
119121
X(LINTEGER) /* integer number */ \
122+
X(LHEX) /* hexadecimal number*/\
120123
X(DOLLAR) \
121124
X(JSON) \
122125
X(COLON) \
@@ -202,6 +205,7 @@ namespace parser {
202205

203206
Token::TokenType parse_number(size_t& i);
204207
bool parse_float_expo(size_t& i);
208+
bool parse_hex(size_t& i);
205209

206210
public:
207211

0 commit comments

Comments
 (0)