ai-driven
diff --git a/‎mars/api.py‎
Lines changed: 9 additions & 9 deletions b/‎mars/api.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎mars/dataframe/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎mars/dataframe/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mars/dataframe/execution/tests/test_arithmetic_execution.py‎
Lines changed: 18 additions & 0 deletions b/‎mars/dataframe/execution/tests/test_arithmetic_execution.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎mars/dataframe/expressions/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎mars/dataframe/expressions/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mars/dataframe/expressions/arithmetic/core.py‎
Lines changed: 1 addition & 0 deletions b/‎mars/dataframe/expressions/arithmetic/core.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mars/dataframe/expressions/utils.py‎
Lines changed: 21 additions & 1 deletion b/‎mars/dataframe/expressions/utils.py‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎mars/dataframe/initializer.py‎
Lines changed: 35 additions & 0 deletions b/‎mars/dataframe/initializer.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎mars/deploy/local/session.py‎
Lines changed: 38 additions & 38 deletions b/‎mars/deploy/local/session.py‎
Lines changed: 38 additions & 38 deletions
@@ -94,36 +94,36 @@ def get_graph_state(self, session_id, graph_key):
         state = GraphState(state.lower())
         return state
 
-    def fetch_data(self, session_id, graph_key, tensor_key, compressions=None, wait=True):
+    def fetch_data(self, session_id, graph_key, tileable_key, compressions=None, wait=True):
         graph_uid = GraphActor.gen_uid(session_id, graph_key)
         graph_address = self.cluster_info.get_scheduler(graph_uid)
         result_ref = self.actor_client.actor_ref(ResultReceiverActor.default_name(), address=graph_address)
 
         compressions = set(compressions or []) | {dataserializer.COMPRESS_FLAG_NONE}
-        return result_ref.fetch_tileable(session_id, graph_key, tensor_key, compressions, _wait=wait)
+        return result_ref.fetch_tileable(session_id, graph_key, tileable_key, compressions, _wait=wait)
 
-    def delete_data(self, session_id, graph_key, tensor_key):
+    def delete_data(self, session_id, graph_key, tileable_key):
         graph_uid = GraphActor.gen_uid(session_id, graph_key)
         graph_ref = self.get_actor_ref(graph_uid)
-        graph_ref.free_tileable_data(tensor_key, _tell=True)
+        graph_ref.free_tileable_data(tileable_key, _tell=True)
 
-    def get_tensor_nsplits(self, session_id, graph_key, tensor_key):
+    def get_tileable_nsplits(self, session_id, graph_key, tileable_key):
         # nsplits is essential for operator like `reshape` and shape can be calculated by nsplits
         graph_uid = GraphActor.gen_uid(session_id, graph_key)
         graph_ref = self.get_actor_ref(graph_uid)
-        chunk_indexes = graph_ref.get_tileable_chunk_indexes(tensor_key)
+        chunk_indexes = graph_ref.get_tileable_chunk_indexes(tileable_key)
 
         chunk_meta_ref = self.get_actor_ref(ChunkMetaActor.default_name())
         chunk_shapes = chunk_meta_ref.batch_get_chunk_shape(session_id, list(chunk_indexes.keys()))
 
         # for each dimension, record chunk shape whose index is zero on other dimensions
         ndim = len(chunk_shapes[0])
-        tensor_nsplits = []
+        tileable_nsplits = []
         for i in range(ndim):
             splits = []
             for index, shape in zip(chunk_indexes.values(), chunk_shapes):
                 if all(idx == 0 for j, idx in enumerate(index) if j != i):
                     splits.append(shape[i])
-            tensor_nsplits.append(tuple(splits))
+            tileable_nsplits.append(tuple(splits))
 
-        return tuple(tensor_nsplits)
+        return tuple(tileable_nsplits)
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .initializer import DataFrame
 # do imports to register operands
 from . import expressions
 del expressions
 
@@ -207,3 +207,21 @@ def testAddWithShuffleAndWithOneChunk(self):
         result = self.executor.execute_dataframe(df3, concat=True, compose=False)[0]
 
         pd.testing.assert_frame_equal(expected, result)
+
+    def testAddWithAdded(self):
+        data1 = pd.DataFrame(np.random.rand(10, 10))
+        df1 = from_pandas(data1, chunk_size=5)
+        data2 = pd.DataFrame(np.random.rand(10, 10))
+        df2 = from_pandas(data2, chunk_size=6)
+
+        df3 = add(df1, df2)
+
+        data4 = pd.DataFrame(np.random.rand(10, 10))
+        df4 = from_pandas(data4, chunk_size=6)
+
+        df5 = add(df3, df4)
+
+        result = self.executor.execute_dataframe(df5, concat=True, compose=False)[0]
+        expected = data1 + data2 + data4
+
+        pd.testing.assert_frame_equal(expected, result)
@@ -12,5 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from .utils import concat_tileable_chunks, get_fetch_op_cls
@@ -339,6 +339,7 @@ def _get_chunk_index_min_max(cls, df, index_type, axis):
 
     @classmethod
     def _need_align_map(cls, input_chunk, index_min_max, column_min_max):
+        assert not np.isnan(index_min_max[0]) and not np.isnan(index_min_max[2])
         if input_chunk.index_value is None or input_chunk.columns is None:
             return True
         if input_chunk.index_value.min_max != index_min_max:
 
@@ -26,6 +26,11 @@
 from ..core import IndexValue
 
 
+def is_pd_range_empty(pd_range_index):
+    return (pd_range_index._start >= pd_range_index._stop and pd_range_index._step >= 0) or \
+           (pd_range_index._start <= pd_range_index._stop and pd_range_index._step < 0)
+
+
 def decide_chunk_sizes(shape, chunk_size, memory_usage):
     """
     Decide how a given DataFrame can be split into chunk.
@@ -115,7 +120,19 @@ def _serialize_index(index):
         return getattr(IndexValue, type(index).__name__)(_name=index.name, **properties)
 
     def _serialize_range_index(index):
-        properties = _extract_property(index, False)
+        if is_pd_range_empty(index):
+            properties = {
+                '_is_monotonic_increasing': True,
+                '_is_monotonic_decreasing': False,
+                '_is_unique': True,
+                '_min_val': index._start,
+                '_max_val': index._stop,
+                '_min_val_close': True,
+                '_max_val_close': False,
+                '_key': key or tokenize(index),
+            }
+        else:
+            properties = _extract_property(index, False)
         return IndexValue.RangeIndex(_slice=slice(index._start, index._stop, index._step),
                                      _name=index.name, **properties)
 
@@ -282,6 +299,9 @@ def build_empty_df(dtypes):
 
 
 def _filter_range_index(pd_range_index, min_val, min_val_close, max_val, max_val_close):
+    if is_pd_range_empty(pd_range_index):
+        return pd_range_index
+
     raw_min, raw_max, step = pd_range_index.min(), pd_range_index.max(), pd_range_index._step
 
     # seek min range
 
@@ -0,0 +1,35 @@
+# Copyright 1999-2018 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    import pandas as pd
+except ImportError:  # pragma: no cover
+    pass
+
+from ..tensor.core import TENSOR_TYPE
+from .core import DATAFRAME_TYPE, DataFrame as _Frame
+from .expressions.datasource.dataframe import from_pandas
+
+
+class DataFrame(_Frame):
+    def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False,
+                 chunk_size=None, gpu=None, sparse=None):
+        if isinstance(data, TENSOR_TYPE):
+            raise NotImplementedError('Not support create DataFrame from tensor')
+        if isinstance(data, DATAFRAME_TYPE):
+            raise NotImplementedError('Not support yet')
+
+        pdf = pd.DataFrame(data, index=index, columns=columns, dtype=dtype, copy=copy)
+        df = from_pandas(pdf, chunk_size=chunk_size, gpu=gpu, sparse=sparse)
+        super(DataFrame, self).__init__(df.data)
@@ -30,9 +30,9 @@ class LocalClusterSession(object):
     def __init__(self, endpoint, **kwargs):
         self._session_id = uuid.uuid4()
         self._endpoint = endpoint
-        # dict structure: {tensor_key -> graph_key, tensor_ids}
-        # dict value is a tuple object which records graph key and tensor id
-        self._executed_tensors = dict()
+        # dict structure: {tileable_key -> graph_key, tileable_ids}
+        # dict value is a tuple object which records graph key and tilable id
+        self._executed_tileables = dict()
         self._api = MarsAPI(self._endpoint)
 
         # create session on the cluster side
@@ -51,35 +51,35 @@ def endpoint(self, endpoint):
         self._endpoint = endpoint
         self._api = MarsAPI(self._endpoint)
 
-    def _get_tensor_graph_key(self, tensor_key):
-        return self._executed_tensors[tensor_key][0]
+    def _get_tileable_graph_key(self, tileable_key):
+        return self._executed_tileables[tileable_key][0]
 
-    def _set_tensor_graph_key(self, tensor, graph_key):
-        tensor_key = tensor.key
-        tensor_id = tensor.id
-        if tensor_key in self._executed_tensors:
-            self._executed_tensors[tensor_key][1].add(tensor_id)
+    def _set_tileable_graph_key(self, tileable, graph_key):
+        tileable_key = tileable.key
+        tileable_id = tileable.id
+        if tileable_key in self._executed_tileables:
+            self._executed_tileables[tileable_key][1].add(tileable_id)
         else:
-            self._executed_tensors[tensor_key] = graph_key, {tensor_id}
+            self._executed_tileables[tileable_key] = graph_key, {tileable_id}
 
-    def _update_tensor_shape(self, tensor):
-        graph_key = self._get_tensor_graph_key(tensor.key)
-        new_nsplits = self._api.get_tensor_nsplits(self._session_id, graph_key, tensor.key)
-        tensor._update_shape(tuple(sum(nsplit) for nsplit in new_nsplits))
-        tensor.nsplits = new_nsplits
+    def _update_tileable_shape(self, tileable):
+        graph_key = self._get_tileable_graph_key(tileable.key)
+        new_nsplits = self._api.get_tileable_nsplits(self._session_id, graph_key, tileable.key)
+        tileable._update_shape(tuple(sum(nsplit) for nsplit in new_nsplits))
+        tileable.nsplits = new_nsplits
 
-    def run(self, *tensors, **kw):
+    def run(self, *tileables, **kw):
         timeout = kw.pop('timeout', -1)
         fetch = kw.pop('fetch', True)
         compose = kw.pop('compose', True)
         if kw:
             raise TypeError('run got unexpected key arguments {0}'.format(', '.join(kw.keys())))
 
-        # those executed tensors should fetch data directly, submit the others
-        run_tensors = [t for t in tensors if t.key not in self._executed_tensors]
+        # those executed tileables should fetch data directly, submit the others
+        run_tileables = [t for t in tileables if t.key not in self._executed_tileables]
 
-        graph = build_graph(run_tensors, executed_keys=list(self._executed_tensors.keys()))
-        targets = [t.key for t in run_tensors]
+        graph = build_graph(run_tileables, executed_keys=list(self._executed_tileables.keys()))
+        targets = [t.key for t in run_tileables]
         graph_key = uuid.uuid4()
 
         # submit graph to local cluster
@@ -100,40 +100,40 @@ def run(self, *tensors, **kw):
         if 0 < timeout < time.time() - exec_start_time:
             raise TimeoutError
 
-        for t in tensors:
-            self._set_tensor_graph_key(t, graph_key)
+        for t in tileables:
+            self._set_tileable_graph_key(t, graph_key)
 
         if not fetch:
             return
         else:
-            return self.fetch(*tensors)
+            return self.fetch(*tileables)
 
-    def fetch(self, *tensors):
+    def fetch(self, *tileables):
         futures = []
-        for tensor in tensors:
-            key = tensor.key
+        for tileable in tileables:
+            key = tileable.key
 
-            if key not in self._executed_tensors:
-                raise ValueError('Cannot fetch the unexecuted tensor')
+            if key not in self._executed_tileables:
+                raise ValueError('Cannot fetch the unexecuted tileable')
 
-            graph_key = self._get_tensor_graph_key(tensor.key)
+            graph_key = self._get_tileable_graph_key(tileable.key)
             compressions = dataserializer.get_supported_compressions()
             future = self._api.fetch_data(self._session_id, graph_key, key, compressions, wait=False)
             futures.append(future)
         return [dataserializer.loads(f.result()) for f in futures]
 
     def decref(self, *keys):
-        for tensor_key, tensor_id in keys:
-            if tensor_key not in self._executed_tensors:
+        for tileable_key, tileable_id in keys:
+            if tileable_key not in self._executed_tileables:
                 continue
-            graph_key, ids = self._executed_tensors[tensor_key]
-            if tensor_id in ids:
-                ids.remove(tensor_id)
-                # for those same key tensors, do decref only when all those tensors are garbage collected
+            graph_key, ids = self._executed_tileables[tileable_key]
+            if tileable_id in ids:
+                ids.remove(tileable_id)
+                # for those same key tileables, do decref only when all those tileables are garbage collected
                 if len(ids) != 0:
                     continue
-                self._api.delete_data(self._session_id, graph_key, tensor_key)
-                del self._executed_tensors[tensor_key]
+                self._api.delete_data(self._session_id, graph_key, tileable_key)
+                del self._executed_tileables[tileable_key]
 
     def __enter__(self):
         return self