MYC-360 Parser fixes:

silvakid · silvakid · commit 22cccd7ea617 · 2016-09-28T11:20:39.000+01:00
"~x" – should be treated as bitwise negation, while "!x" is logical
negation
"a^22" – ERROR: Unknown character at 1
"c &gt; cast(14.01 as decimal(3,2)) " – ERROR: CAST type invalid. !
"X'65'" literal
0x65 literal
diff --git a/cdk/CMakeLists.txt b/cdk/CMakeLists.txt
@@ -213,8 +213,8 @@ if(WITH_TESTS)
     extra/process_launcher/process_launcher.cc
     extra/common/exception.cc)
   target_include_directories(process_launcher PRIVATE
-    ${CMAKE_SOURCE_DIR}/extra/process_launcher/
-    ${CMAKE_SOURCE_DIR}/extra/common)
+    ${PROJECT_SOURCE_DIR}/extra/process_launcher/
+    ${PROJECT_SOURCE_DIR}/extra/common)
 
   # quiet compile warnings outside of our code
   if (MSVC)
diff --git a/cdk/include/mysql/cdk/protocol/mysqlx_expr.h b/cdk/include/mysql/cdk/protocol/mysqlx_expr.h
@@ -137,7 +137,16 @@ class Scalar_processor
   virtual void num(float) =0;
   virtual void num(double) =0;
   virtual void yesno(bool) =0;
-  virtual void octets(bytes) =0;
+
+  // source: ``Mysqlx.Resultset.ColumnMetadata`` for list of known values
+  enum Octets_content_type
+  {
+    CT_PLAIN = 0x0000,       //   default value; general use of octets
+    CT_GEOMETRY = 0x0001,    //   BYTES  0x0001 GEOMETRY (WKB encoding)
+    CT_JSON = 0x0002,        //   BYTES  0x0002 JSON (text encoding)
+    CT_XML = 0x0003          //   BYTES  0x0003 XML (text encoding)
+  };
+  virtual void octets(bytes, Octets_content_type) =0;
 };
 
 
@@ -282,6 +291,8 @@ struct Safe_prc<protocol::mysqlx::api::Scalar_processor>
 
   using Base::m_prc;
 
+  typedef Processor::Octets_content_type Octets_content_type;
+
   void null() { return m_prc ? m_prc->null() : (void)NULL; }
 
   void str(bytes val)
@@ -305,8 +316,8 @@ struct Safe_prc<protocol::mysqlx::api::Scalar_processor>
   void yesno(bool val)
   { return m_prc ? m_prc->yesno(val) : (void)NULL; }
 
-  void octets(bytes data)
-  { return m_prc ? m_prc->octets(data) : (void)NULL; }
+  void octets(bytes data, Octets_content_type type)
+  { return m_prc ? m_prc->octets(data, type) : (void)NULL; }
 };
 
 
diff --git a/cdk/mysqlx/converters.h b/cdk/mysqlx/converters.h
@@ -26,6 +26,8 @@
 #define CDK_MYSQLX_CONVERTERS_H
 
 #include <mysql/cdk/converters.h>
+#include <mysql/cdk/protocol/mysqlx_expr.h>
+#include <mysql/cdk/foundation/codec.h>
 
 namespace cdk {
 namespace mysqlx {
@@ -73,7 +75,7 @@ struct Scalar_prc_converter
     m_proc->str(utf8);
   }
 
-  virtual void value(Type_info, const Format_info&, bytes data)
+  virtual void value(Type_info type, const Format_info &fi, bytes data)
   {
     /*
       TODO: Eventually we should Look at type/format info and do
@@ -85,7 +87,60 @@ struct Scalar_prc_converter
       form use the same encoding that is used by protocol and thus
       we can simply pass the raw bytes without any modifications.
     */
-    m_proc->octets(data);
+    switch (type)
+    {
+    case cdk::TYPE_INTEGER:
+      {
+        cdk::Codec<cdk::TYPE_INTEGER> codec(fi);
+
+        int64_t val;
+        codec.from_bytes(data, val);
+
+        m_proc->num(val);
+      }
+      break;
+    case cdk::TYPE_FLOAT:
+      {
+        cdk::Codec<cdk::TYPE_FLOAT> codec(fi);
+
+        double val;
+        codec.from_bytes(data, val);
+
+        m_proc->num(val);
+      }
+      break;
+    case cdk::TYPE_STRING:
+      {
+        cdk::Codec<cdk::TYPE_STRING> codec(fi);
+
+        string val;
+        codec.from_bytes(data, val);
+
+        m_proc->str(bytes(val));
+      }
+      break;
+    case cdk::TYPE_DATETIME:
+      {
+        //TODO: TYPE_DATETIME
+      }
+      break;
+    case cdk::TYPE_BYTES:
+      m_proc->octets(data,
+                     cdk::protocol::mysqlx::api::Scalar_processor::CT_PLAIN);
+      break;
+    case cdk::TYPE_DOCUMENT:
+      m_proc->octets(data,
+                     cdk::protocol::mysqlx::api::Scalar_processor::CT_JSON);
+      break;
+    case cdk::TYPE_GEOMETRY:
+      m_proc->octets(data, cdk::protocol::mysqlx::api::Scalar_processor::CT_GEOMETRY);
+      break;
+    case cdk::TYPE_XML:
+      m_proc->octets(data, cdk::protocol::mysqlx::api::Scalar_processor::CT_XML);
+      break;
+    }
+
+
   }
 
 };
diff --git a/cdk/parser/expr_parser.cc b/cdk/parser/expr_parser.cc
@@ -35,6 +35,8 @@ PUSH_BOOST_WARNINGS
 POP_BOOST_WARNINGS
 POP_SYS_WARNINGS
 
+#include <sstream>
+
 // TODO: Complete the parser
 // TODO: Better parser errors
 
@@ -189,6 +191,7 @@ void Expr_parser_base::parse_cast(Scalar_prc *prc)
 }
 
 
+
 /**
    castType ::=
         SIGNED INTEGER?
@@ -275,7 +278,7 @@ std::string Expr_parser_base::cast_data_type_dimension(bool double_dimension)
   if (double_dimension && cur_token_type_is(Token::COMMA))
   {
     consume_token(Token::COMMA);
-    result += ", " + consume_token(Token::LINTEGER);
+    result += "," + consume_token(Token::LINTEGER);
   }
   result += ")";
   consume_token(Token::RPAREN);
@@ -812,11 +815,17 @@ Expression* Expr_parser_base::parse_atomic(Processor *prc)
       }
 
     case Token::BANG:
+    get_token();
+    argsp = sprc->op(operator_name("!").c_str());
+    break;
     case Token::NOT:
+    get_token();
+    argsp = sprc->op(operator_name("not").c_str());
+    break;
     case Token::NEG:
-      get_token();
-      argsp = sprc->op(operator_name("not").c_str());
-      break;
+    get_token();
+    argsp = sprc->op(operator_name("~").c_str());
+    break;
 
     default:
       break;  // will continue with literal parsing
@@ -872,6 +881,25 @@ Expression* Expr_parser_base::parse_atomic(Processor *prc)
       return stored.release();
     }
     RETHROW_BOOST_LEXICAL;
+  case Token::LHEX:
+    try {
+      std::stringstream ss;
+      ss << get_token().get_text();
+      if (neg)
+      {
+        int64_t val;
+        ss >> std::hex >> val;
+        sprc->val()->num(-val);
+      }
+      else
+      {
+        uint64_t val;
+        ss >> std::hex >> val;
+        sprc->val()->num(val);
+      }
+      return stored.release();
+    }
+    RETHROW_BOOST_LEXICAL;
 
     case Token::TRUE_:
     case Token::FALSE_:
diff --git a/cdk/parser/expr_parser.h b/cdk/parser/expr_parser.h
@@ -547,6 +547,8 @@ class Expr_parser_base
   std::string cast_data_type_dimension(bool double_dimension = false);
   std::string opt_binary();
 
+  void parse_char(Scalar_prc*);
+
   void parse_doc(Processor::Doc_prc*);
   void parse_arr(Processor::List_prc*);
 
@@ -1150,7 +1152,6 @@ struct Stored_scalar
 
 };
 
-
 // --------------------------------------------------------------------------
 
 
diff --git a/cdk/parser/tests/parser-t.cc b/cdk/parser/tests/parser-t.cc
@@ -576,6 +576,23 @@ const Expr_Test exprs[] =
   { parser::Parser_mode::DOCUMENT, L"name LIKE :name AND age > :age" },
   { parser::Parser_mode::TABLE   , L"`date`->$.year"},
   { parser::Parser_mode::DOCUMENT, L"count(*)" },
+  { parser::Parser_mode::TABLE   , L"~x"},
+  { parser::Parser_mode::TABLE   , L"a^22"},
+  { parser::Parser_mode::TABLE   , L"a^~22"},
+  { parser::Parser_mode::TABLE   , L" a >cast(11 as signed Int)"},
+  { parser::Parser_mode::TABLE   , L"c > cast(14.01 as decimal(3,2))"},
+  { parser::Parser_mode::TABLE   , L"CHARSET(CHAR(X'65'))"},
+  { parser::Parser_mode::TABLE   , L"CHARSET(CHAR(0x65))"},
+  { parser::Parser_mode::TABLE   , L"CHARSET(CHAR(X'65' USING utf8))"},
+//  { parser::Parser_mode::TABLE   , L"TRIM(BOTH 'x' FROM 'xxxbarxxx')"},
+//  { parser::Parser_mode::TABLE   , L"TRIM(LEADING 'x' FROM 'xxxbarxxx')"},
+//  { parser::Parser_mode::TABLE   , L"TRIM(TRAILING 'xyz' FROM 'barxxyz')"},
+  { parser::Parser_mode::TABLE   , L"'abc' NOT LIKE 'ABC1'"},
+//  { parser::Parser_mode::TABLE   , L"'a' RLIKE '^[a-d]'"},
+  { parser::Parser_mode::TABLE   , L"'a' REGEXP '^[a-d]'"},
+//  { parser::Parser_mode::TABLE   , L"POSITION('bar' IN 'foobarbar')"},
+//  { parser::Parser_mode::TABLE   , L"'Heoko' SOUNDS LIKE 'h1aso'"}
+
 };
 
 
@@ -913,8 +930,8 @@ std::ostream& operator<<(std::ostream &out, URI_parts &data)
     cout << " user: " << data.user << endl;
   if (data.pwd)
     cout << "  pwd: " << data.pwd << endl;
-    cout << " host: " << data.host << endl;
-    cout << " port: " << data.port << endl;
+  cout << " host: " << data.host << endl;
+  cout << " port: " << data.port << endl;
   if (data.path)
     cout << " path: " << data.path << endl;
   if (data.has_query)
diff --git a/cdk/parser/tokenizer.cc b/cdk/parser/tokenizer.cc
@@ -132,6 +132,7 @@ Tokenizer::Maps::Maps()
   operator_names["<"] = "<";
   operator_names["<="] = "<=";
   operator_names["&"] = "&";
+  operator_names["^"] = "^";
   operator_names["|"] = "|";
   operator_names["<<"] = "<<";
   operator_names[">>"] = ">>";
@@ -311,6 +312,64 @@ bool Tokenizer::parse_float_expo(size_t& i)
   return true;
 }
 
+/*
+  Check if we have a Hexadecimal literal:
+
+  X'12ab'
+  x'12ab'
+  ox12ab
+*/
+
+bool Tokenizer::parse_hex(size_t& i)
+{
+  std::string val;
+  bool has_value = false;
+  int start = i;
+  if((_input[i] == 'X' || _input[i] == 'x') && next_char_is(i, '\''))
+  {
+    i+=2;
+
+    start = i;
+
+    for (; i < _input.size();++i)
+    {
+      if (_input[i] == '\'')
+      {
+        // We don't want the 'either (so the -2)
+        val.assign(_input, start, i-2);
+
+        has_value = true;
+        break;
+      }
+    }
+  }
+  else if (_input[i] == '0' && (next_char_is(i, 'x') || next_char_is(i, 'X')))
+  {
+    i+=2;
+
+    int start = i;
+
+    for (; i < _input.size() && std::isalnum(_input[i]);++i)
+    {}
+
+   --i;
+
+    val.assign(_input, start, i-1);
+
+    has_value = true;
+
+  }
+
+  if (has_value)
+  {
+    _tokens.push_back(Token(Token::LHEX, val));
+
+    return true;
+  }
+
+  return false;
+}
+
 void Tokenizer::get_tokens()
 {
   for (size_t i = 0; i < _input.size(); ++i)
@@ -326,6 +385,9 @@ void Tokenizer::get_tokens()
       continue;
     }
 
+    if ( parse_hex(i))
+      continue;
+
     Token::TokenType tt = Token::T_NULL;
     size_t j=i;
     if (Token::T_NULL != (tt = parse_number(j)))
@@ -392,6 +454,10 @@ void Tokenizer::get_tokens()
       {
         _tokens.push_back(Token(Token::BITOR, std::string(1, c)));
       }
+      else if (c == '^')
+      {
+        _tokens.push_back(Token(Token::BITXOR, std::string(1, c)));
+      }
       else if (c == '(')
       {
         _tokens.push_back(Token(Token::LPAREN, std::string(1, c)));
diff --git a/cdk/parser/tokenizer.h b/cdk/parser/tokenizer.h
@@ -55,6 +55,7 @@ namespace parser {
     X(FALSE_) \
     X(IN_ ) \
     X(LIKE) \
+    X(RLIKE) \
     X(INTERVAL) \
     X(REGEXP) \
     X(ESCAPE) \
@@ -97,6 +98,7 @@ namespace parser {
     X(DOUBLESTAR) \
     X(MOD) \
     X(AS) \
+    X(USING) \
     X(ASC) \
     X(DESC) \
     X(CAST) \
@@ -117,6 +119,7 @@ namespace parser {
     X(UNSIGNED) \
     X(INTEGER) /* 'integer' keyword */ \
     X(LINTEGER) /* integer number */ \
+    X(LHEX) /* hexadecimal number*/\
     X(DOLLAR) \
     X(JSON) \
     X(COLON) \
@@ -202,6 +205,7 @@ namespace parser {
 
     Token::TokenType parse_number(size_t& i);
     bool parse_float_expo(size_t& i);
+    bool parse_hex(size_t& i);
 
   public:
 
diff --git a/cdk/protocol/mysqlx/builders.h b/cdk/protocol/mysqlx/builders.h
diff --git a/devapi/tests/first-t.cc b/devapi/tests/first-t.cc