@@ -116,8 +116,14 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
116
116
}
117
117
118
118
override def refreshTable (databaseName : String , tableName : String ): Unit = {
119
- // refresh table does not eagerly reload the cache. It just invalidate the cache.
119
+ // refreshTable does not eagerly reload the cache. It just invalidate the cache.
120
120
// Next time when we use the table, it will be populated in the cache.
121
+ // Since we also cache ParquetRealtions converted from Hive Parquet tables and
122
+ // adding converted ParquetRealtions into the cache is not defined in the load function
123
+ // of the cache (instead, we add the cache entry in convertToParquetRelation),
124
+ // it is better at here to invalidate the cache to avoid confusing waring logs from the
125
+ // cache loader (e.g. cannot find data source provider, which is only defined for
126
+ // data source table.).
121
127
invalidateTable(databaseName, tableName)
122
128
}
123
129
@@ -242,21 +248,27 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
242
248
QualifiedTableName (metastoreRelation.databaseName, metastoreRelation.tableName)
243
249
244
250
def getCached (
245
- tableIdentifier : QualifiedTableName ,
246
- pathsInMetastore : Seq [String ],
247
- schemaInMetastore : StructType ,
248
- partitionSpecInMetastore : Option [PartitionSpec ]): Option [LogicalRelation ] = {
251
+ tableIdentifier : QualifiedTableName ,
252
+ pathsInMetastore : Seq [String ],
253
+ schemaInMetastore : StructType ,
254
+ partitionSpecInMetastore : Option [PartitionSpec ]): Option [LogicalRelation ] = {
249
255
cachedDataSourceTables.getIfPresent(tableIdentifier) match {
250
256
case null => None // Cache miss
251
- case logical @ LogicalRelation (parquetRelation : ParquetRelation2 ) =>
257
+ case logical@ LogicalRelation (parquetRelation : ParquetRelation2 ) =>
252
258
// If we have the same paths, same schema, and same partition spec,
253
259
// we will use the cached Parquet Relation.
254
260
val useCached =
255
- parquetRelation.paths == pathsInMetastore &&
261
+ parquetRelation.paths.toSet == pathsInMetastore.toSet &&
256
262
logical.schema.sameType(metastoreSchema) &&
257
263
parquetRelation.maybePartitionSpec == partitionSpecInMetastore
258
264
259
- if (useCached) Some (logical) else None
265
+ if (useCached) {
266
+ Some (logical)
267
+ } else {
268
+ // If the cached relation is not updated, we invalidate it right away.
269
+ cachedDataSourceTables.invalidate(tableIdentifier)
270
+ None
271
+ }
260
272
case other =>
261
273
logWarning(
262
274
s " ${metastoreRelation.databaseName}. ${metastoreRelation.tableName} shold be stored " +
0 commit comments