kogupta
diff --git a/‎src/main/java/datasources/FlexibleRowDataSource.java‎
Lines changed: 6 additions & 1 deletion b/‎src/main/java/datasources/FlexibleRowDataSource.java‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/main/java/datasources/ParallelRowDataSource.java‎
Lines changed: 6 additions & 1 deletion b/‎src/main/java/datasources/ParallelRowDataSource.java‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/main/java/datasources/PartitioningRowDataSource.java‎
Lines changed: 64 additions & 42 deletions b/‎src/main/java/datasources/PartitioningRowDataSource.java‎
Lines changed: 64 additions & 42 deletions
diff --git a/‎src/main/java/datasources/SimpleRowDataSource.java‎
Lines changed: 6 additions & 1 deletion b/‎src/main/java/datasources/SimpleRowDataSource.java‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/main/java/datasources/utils/DBClientWrapper.java‎
Lines changed: 8 additions & 0 deletions b/‎src/main/java/datasources/utils/DBClientWrapper.java‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/main/java/edb/client/DBClient.java‎
Lines changed: 24 additions & 0 deletions b/‎src/main/java/edb/client/DBClient.java‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎src/main/java/edb/common/IExampleDB.java‎
Lines changed: 2 additions & 0 deletions b/‎src/main/java/edb/common/IExampleDB.java‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/main/java/edb/server/ClusteredIndexTable.java‎
Lines changed: 2 additions & 0 deletions b/‎src/main/java/edb/server/ClusteredIndexTable.java‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/main/java/edb/server/DBServer.java‎
Lines changed: 25 additions & 0 deletions b/‎src/main/java/edb/server/DBServer.java‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/main/java/edb/server/Database.java‎
Lines changed: 14 additions & 0 deletions b/‎src/main/java/edb/server/Database.java‎
Lines changed: 14 additions & 0 deletions
@@ -46,7 +46,9 @@ public DataSourceReader createReader(DataSourceOptions options) {
      * resulting Dataset will have only a single partition -- that's why this DataSource
      * only provides sequential reads.
      */
-    class Reader implements DataSourceReader {
+    static class Reader implements DataSourceReader {
+
+        static Logger log = Logger.getLogger(Reader.class.getName());
 
         public Reader(String host, int port, String table) {
             _host = host;
@@ -77,6 +79,7 @@ public StructType readSchema() {
 
         @Override
         public List<DataReaderFactory<Row>> createDataReaderFactories() {
+            log.info("creating a single factory");
             return java.util.Collections.singletonList(
                     new SimpleDataReaderFactory(_host, _port, _table, readSchema()));
         }
@@ -127,6 +130,8 @@ public void close() throws IOException {
      */
     static class SimpleDataReaderFactory implements DataReaderFactory<Row> {
 
+        static Logger log = Logger.getLogger(SimpleDataReaderFactory.class.getName());
+
         public SimpleDataReaderFactory(String host, int port,
                                        String table, StructType schema) {
             _host = host;
 
@@ -48,7 +48,9 @@ public DataSourceReader createReader(DataSourceOptions options) {
      * and how it obtains the reader factories to be used by the executors to create readers.
      * Notice that one factory is created for each partition.
      */
-    class Reader implements DataSourceReader {
+    static class Reader implements DataSourceReader {
+
+        static Logger log = Logger.getLogger(Reader.class.getName());
 
         public Reader(String host, int port, String table, int partitions) {
             _host = host;
@@ -100,6 +102,7 @@ public List<DataReaderFactory<Row>> createDataReaderFactories() {
                         new SplitDataReaderFactory(_host, _port, _table, readSchema(), split);
                 factories.add(factory);
             }
+            log.info("created " + factories.size() + " factories");
             return factories;
         }
     }
@@ -149,6 +152,8 @@ public void close() throws IOException {
      */
     static class SplitDataReaderFactory implements DataReaderFactory<Row> {
 
+        static Logger log = Logger.getLogger(SplitDataReaderFactory.class.getName());
+
         public SplitDataReaderFactory(String host, int port,
                                        String table, StructType schema,
                                        Split split) {
 
@@ -53,54 +53,67 @@ public DataSourceReader createReader(DataSourceOptions options) {
      * and how it obtains the reader factories to be used by the executors to create readers.
      * Notice that one factory is created for each partition.
      */
-    class Reader implements DataSourceReader, SupportsReportPartitioning {
+    static class Reader implements DataSourceReader, SupportsReportPartitioning {
+
+        static Logger log = Logger.getLogger(Reader.class.getName());
 
         public Reader(String host, int port, String table, int partitions) {
             _host = host;
             _port = port;
             _table = table;
-            _partitions = partitions;
+            _requestedPartitions = partitions;
         }
 
-        private StructType _schema;
         private String _host;
         private int _port;
         private String _table;
-        private int _partitions;
+        private int _requestedPartitions;
 
-        @Override
-        public StructType readSchema() {
-            if (_schema == null) {
+        //
+        // dynamic properties inferred from database
+        //
+
+        private boolean _initialized = false;
+        private StructType _schema;
+        private String _clusteredColumn;
+        private List<Split> _splits;
+
+
+        private void initialize() {
+            if (!_initialized) {
+                log.info("initializing");
                 DBClientWrapper db = new DBClientWrapper(_host, _port);
                 db.connect();
                 try {
                     _schema = db.getSchema(_table);
+                    _clusteredColumn = db.getClusteredIndexColumn(_table);
+                    if (_requestedPartitions == 0)
+                        _splits = db.getSplits(_table);
+                    else
+                        _splits = db.getSplits(_table, _requestedPartitions);
                 } catch (UnknownTableException ute) {
                     throw new RuntimeException(ute);
                 } finally {
                     db.disconnect();
                 }
+                _initialized = true;
+                log.info("initialized");
             }
+        }
+
+        @Override
+        public StructType readSchema() {
+            log.info("schema requested for table [" + _table + "]");
+            initialize();
             return _schema;
         }
 
         @Override
         public List<DataReaderFactory<Row>> createDataReaderFactories() {
-            List<Split> splits = null;
-            DBClientWrapper db = new DBClientWrapper(_host, _port);
-            db.connect();
-            try {
-                if (_partitions == 0)
-                    splits = db.getSplits(_table);
-                else
-                    splits = db.getSplits(_table, _partitions);
-            } catch (UnknownTableException ute) {
-                throw new RuntimeException(ute);
-            } finally {
-                db.disconnect();
-            }
+            log.info("reader factories requested for table [" + _table + "]");
+            initialize();
             List<DataReaderFactory<Row>> factories = new ArrayList<>();
-            for (Split split : splits) {
+            for (Split split : _splits) {
                 DataReaderFactory<Row> factory =
                         new SplitDataReaderFactory(_host, _port, _table, readSchema(), split);
                 factories.add(factory);
@@ -110,37 +123,24 @@ public List<DataReaderFactory<Row>> createDataReaderFactories() {
 
         @Override
         public Partitioning outputPartitioning() {
-            return new TrivialPartitioning();
-        }
-    }
-
-    static class TrivialPartitioning implements Partitioning {
-
-        static Logger log = Logger.getLogger(TrivialPartitioning.class.getName());
-
-        @Override
-        public int numPartitions() {
-            log.info("asked for numPartitions");
-            return 8;
-        }
-
-        @Override
-        public boolean satisfy(Distribution distribution) {
-            log.info("asked to satisfy");
-            // can't satisfy any Distribution
-            return false;
+            log.info("output partitioning requested for table [" + _table + "]");
+            return new SingleClusteredColumnPartitioning(
+                    _clusteredColumn, _splits.size());
         }
     }
 
     static class SingleClusteredColumnPartitioning implements Partitioning {
 
+        static Logger log = Logger.getLogger(SingleClusteredColumnPartitioning.class.getName());
+
         public SingleClusteredColumnPartitioning(String columnName, int partitions) {
             _columnName = columnName;
             _partitions = partitions;
         }
 
         @Override
         public int numPartitions() {
+            log.info("asked for numPartitions");
             return _partitions;
         }
 
@@ -150,11 +150,31 @@ public boolean satisfy(Distribution distribution) {
             // Since Spark may add other Distribution policies in the future, we can't assume
             // it's always a ClusteredDistribution
             //
+
             if (distribution instanceof ClusteredDistribution) {
+
                 String[] clusteredCols = ((ClusteredDistribution) distribution).clusteredColumns;
-                return Arrays.asList(clusteredCols).contains(_columnName);
+                StringBuilder logEntryBuilder = new StringBuilder();
+                logEntryBuilder.append("asked to satisfy ClusteredDistribution on columns ");
+                if (clusteredCols.length > 0) {
+                    for (String col : clusteredCols) {
+                        logEntryBuilder.append("[");
+                        logEntryBuilder.append(col);
+                        logEntryBuilder.append("] ");
+                    }
+                }
+                log.info(logEntryBuilder.toString());
+                if (_columnName == null) {
+                    log.info("no cluster column so does not satisfy");
+                    return false;
+                } else {
+                    boolean satisfies = Arrays.asList(clusteredCols).contains(_columnName);
+                    log.info("based on cluster column: " + satisfies);
+                    return satisfies;
+                }
             }
-
+            log.info("asked to satisfy unknown distribution of type [" +
+                    distribution.getClass().getCanonicalName() + "]");
             return false;
         }
 
@@ -207,6 +227,8 @@ public void close() throws IOException {
      */
     static class SplitDataReaderFactory implements DataReaderFactory<Row> {
 
+        static Logger log = Logger.getLogger(SplitDataReaderFactory.class.getName());
+
         public SplitDataReaderFactory(String host, int port,
                                        String table, StructType schema,
                                        Split split) {
 
@@ -47,7 +47,9 @@ public DataSourceReader createReader(DataSourceOptions options) {
      * resulting Dataset will have only a single partition -- that's why this DataSource
      * only provides sequential reads.
      */
-    class Reader implements DataSourceReader {
+    static class Reader implements DataSourceReader {
+
+        static Logger log = Logger.getLogger(Reader.class.getName());
 
         public Reader(String host, int port) {
             _host = host;
@@ -65,6 +67,7 @@ public StructType readSchema() {
 
         @Override
         public List<DataReaderFactory<Row>> createDataReaderFactories() {
+            log.info("creating a single factory");
             return java.util.Arrays.asList(new SimpleDataReaderFactory(_host, _port));
         }
     }
@@ -115,6 +118,8 @@ public void close() throws IOException {
      */
     static class SimpleDataReaderFactory implements DataReaderFactory<Row> {
 
+        static Logger log = Logger.getLogger(SimpleDataReaderFactory.class.getName());
+
         public SimpleDataReaderFactory(String host, int port) {
             _host = host;
             _port = port;
 
@@ -60,12 +60,20 @@ public StructType getSchema(String table) throws UnknownTableException
                     fields.add(DataTypes.createStructField(name,
                             DataTypes.DoubleType, true));
                     break;
+                case STRING:
+                    fields.add(DataTypes.createStructField(name,
+                            DataTypes.StringType, true));
+                    break;
                 default:
             }
         }
         return DataTypes.createStructType(fields);
     }
 
+    public String getClusteredIndexColumn(String table) throws UnknownTableException {
+        return _client.getTableClusteredIndexColumn(table);
+    }
+
     public List<Split> getSplits(String table, int count) throws UnknownTableException {
         return _client.getSplits(table, count);
     }
 
@@ -126,6 +126,30 @@ public Schema getTableSchema(String name) throws UnknownTableException {
         }
     }
 
+    public String getTableClusteredIndexColumn(String tableName) throws UnknownTableException {
+        GetTableClusteredIndexColumnRequest.Builder builder = GetTableClusteredIndexColumnRequest.newBuilder();
+        builder.setTableName(tableName);
+
+        GetTableClusteredIndexColumnRequest request = builder.build();
+        GetTableClusteredIndexColumnResponse response;
+        try {
+            response = _blockingStub.getTableClusteredIndexColumn(request);
+        } catch (StatusRuntimeException e) {
+            e.printStackTrace();
+            throw e;
+        }
+
+        if (response.getResult()) {
+            if (response.hasColumnName()) {
+                return response.getColumnName();
+            } else {
+                return null;
+            }
+        } else {
+            throw new UnknownTableException(tableName);
+        }
+    }
+
     public void bulkInsert(String name, List<Row> rows) throws UnknownTableException
     {
         BulkInsertRequest.Builder builder = BulkInsertRequest.newBuilder();
 
@@ -12,6 +12,8 @@ void createTable(String name, Schema schema, String clusterColumn)
 
     Schema getTableSchema(String name) throws UnknownTableException;
 
+    String getTableClusteredIndexColumn(String name) throws UnknownTableException;
+
     void bulkInsert(String name, List<Row> rows) throws UnknownTableException;
 
     List<Row> getAllRows(String name) throws UnknownTableException;
 
@@ -27,6 +27,8 @@ public ClusteredIndexTable(String name, Schema schema, String indexColumn) {
 
     public Schema getSchema() { return _schema; }
 
+    public String getIndexColumn() { return _indexColumn; }
+
     public void addRows(List<Row> rows) {
         for (Row row : rows) {
             try {
 
@@ -125,6 +125,31 @@ public void getTableSchema(GetTableSchemaRequest req,
             responseObserver.onCompleted();
         }
 
+        @Override
+        public void getTableClusteredIndexColumn(
+                GetTableClusteredIndexColumnRequest req,
+                StreamObserver<GetTableClusteredIndexColumnResponse> responseObserver) {
+
+            String tableName = req.getTableName();
+
+            GetTableClusteredIndexColumnResponse.Builder builder =
+                    GetTableClusteredIndexColumnResponse.newBuilder();
+            try {
+                String columnName = _db.getTableClusteredIndexColumn(tableName);
+                if (columnName != null) {
+                    builder.setColumnName(columnName);
+                }
+                builder.setResult(true);
+
+            } catch (UnknownTableException ete) {
+                builder.setResult(false);
+            }
+
+            GetTableClusteredIndexColumnResponse reply = builder.build();
+            responseObserver.onNext(reply);
+            responseObserver.onCompleted();
+        }
+
         @Override
         public void bulkInsert(BulkInsertRequest req,
                                StreamObserver<BulkInsertResponse> responseObserver) {
 
@@ -47,6 +47,20 @@ public Schema getTableSchema(String name) throws UnknownTableException {
         }
     }
 
+    public String getTableClusteredIndexColumn(String name) throws UnknownTableException {
+        boolean present = _tables.containsKey(name);
+        if (present) {
+            ITable entry = _tables.get(name);
+            if (entry instanceof ClusteredIndexTable) {
+                return ((ClusteredIndexTable) entry).getIndexColumn();
+            } else {
+                return null;
+            }
+        } else {
+            throw new UnknownTableException(name);
+        }
+    }
+
     public void bulkInsert(String name, List<Row> rows) throws UnknownTableException {
         boolean present = _tables.containsKey(name);
         if (present) {
Original file line number	Diff line number	Diff line change
`@@ -60,12 +60,20 @@ public StructType getSchema(String table) throws UnknownTableException`
`60`	`60`	`fields.add(DataTypes.createStructField(name,`
`61`	`61`	`DataTypes.DoubleType, true));`
`62`	`62`	`break;`
	`63`	`+ case STRING:`
	`64`	`+ fields.add(DataTypes.createStructField(name,`
	`65`	`+ DataTypes.StringType, true));`
	`66`	`+ break;`
`63`	`67`	`default:`
`64`	`68`	`}`
`65`	`69`	`}`
`66`	`70`	`return DataTypes.createStructType(fields);`
`67`	`71`	`}`
`68`	`72`
	`73`	`+ public String getClusteredIndexColumn(String table) throws UnknownTableException {`
	`74`	`+ return _client.getTableClusteredIndexColumn(table);`
	`75`	`+ }`
	`76`	`+`
`69`	`77`	`public List<Split> getSplits(String table, int count) throws UnknownTableException {`
`70`	`78`	`return _client.getSplits(table, count);`
`71`	`79`	`}`