Skip to content

4521840 upgrade scala213 spark35 #562

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Jun 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6c3d267
Initial commits for Scala 2.13.16 and Spark 3.5.5 Upgrade
cbharadwajp Mar 19, 2025
6ec3605
Fix the unit test compilation
cbharadwajp Mar 19, 2025
9269f18
Update the upload-artifact version for github actions
cbharadwajp Mar 19, 2025
5c29c14
Install java and sbt before build the project
cbharadwajp Mar 20, 2025
37c64b6
Add the sbt repo details
cbharadwajp Mar 20, 2025
49c249b
Update the upload path for built artifact
cbharadwajp Mar 20, 2025
0951014
Fix the tests and upgrade download-artifacts for integration tests
cbharadwajp Mar 20, 2025
0e0542f
Fix the code coverage upload issue for test cases for integration tests
cbharadwajp Mar 24, 2025
8882083
Fix pipeline for codecov and run-integration tests
cbharadwajp Apr 1, 2025
4d1c3e5
Fix functional and performance testing folder with the upgrade of sca…
cbharadwajp Apr 1, 2025
6befdf4
Install docker for integration tests
cbharadwajp Apr 1, 2025
2a46983
Install docker for integration tests
cbharadwajp Apr 1, 2025
c9091a4
Install docker for integration tests
cbharadwajp Apr 1, 2025
2fe6bf2
Upgrade the codecov version and print the env
cbharadwajp Apr 1, 2025
e139b33
Debug codecov token
cbharadwajp Apr 2, 2025
f832d87
Revert debug codecov token
cbharadwajp Apr 2, 2025
caf2e3b
Set codecov token in env
cbharadwajp Apr 2, 2025
b7a13db
Set codecov token in env
cbharadwajp Apr 2, 2025
9b5d07f
Revert set codecov token in env
cbharadwajp Apr 2, 2025
48d7b13
Downgrade codecov action
cbharadwajp Apr 2, 2025
107ab5a
Update the codecov token
cbharadwajp Apr 2, 2025
d112f63
Comment the integration tests as bitnami supported is not available
cbharadwajp Apr 2, 2025
fbf00f7
Upgrade vertica jdbc driver to 24.4
cbharadwajp Apr 8, 2025
59d922e
Fix the clean up of staging directory for function test suite
cbharadwajp Apr 16, 2025
420ec22
Add cats-core dependency for Vertica 24.4 driver
cbharadwajp Apr 17, 2025
351c0eb
Update the README.md
cbharadwajp Apr 21, 2025
dd09e51
Update the README.md
cbharadwajp Apr 21, 2025
d9c4e5f
Fix the functional test case.
cbharadwajp Apr 21, 2025
8dafd9e
Upload fat and slim jar to artifacts
cbharadwajp May 23, 2025
d5847fb
Incorporate review comments
cbharadwajp May 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 93 additions & 30 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,59 +11,122 @@ jobs:
steps:
- name: Checkout the project
uses: actions/checkout@v2
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'adopt'
- name: Install sbt
run: |
echo "Installing sbt..."
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y sbt
- name: Build the project
run: cd connector && sbt package
run: cd connector && sbt "set test in assembly := {}" clean assembly package
- name: Upload the build artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: build-jar-file
path: /home/runner/work/spark-connector/spark-connector/connector/target/scala-2.12/spark-vertica-connector_2.12-*.jar
path: /home/runner/work/spark-connector/spark-connector/connector/target/scala-2.13/spark-vertica-connector*.jar
run-analysis:
runs-on: ubuntu-latest
needs: build
steps:
- name: Checkout the project
uses: actions/checkout@v2
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'adopt'
- name: Install sbt
run: |
echo "Installing sbt..."
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y sbt
- name: Run scalastyle
run: cd connector && sbt scalastyle
run-unit-tests:
runs-on: ubuntu-latest
needs: build
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
steps:
- name: Checkout the project
uses: actions/checkout@v2
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'adopt'
- name: Install sbt
run: |
echo "Installing sbt..."
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y sbt
- name: Run unit tests
run: cd connector && sbt coverage test coverageReport
- name: Print environment variables
run: env
- name: Upload coverage report to codecov
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v3
with:
token: 896d4ba0-3e73-4869-afef-8203e4e2fde3
flags: unittests
name: codecov-umbrella
fail_ci_if_error: true
verbose: true
run-integration-tests:
runs-on: ubuntu-latest
needs: [build]
env:
VERTICA_VERSION: 12.0.3-0
steps:
- name: Checkout the project
uses: actions/checkout@v2
- name: Run docker compose
run: cd docker && docker-compose up -d
- name: Download the build artifact
uses: actions/download-artifact@v2
with:
name: build-jar-file
path: ./functional-tests/lib/
- name: Wait for Vertica to be available
uses: nick-invision/retry@v2
with:
timeout_seconds: 20
max_attempts: 10
retry_on: error
command: docker logs docker_vertica_1 | grep "Vertica container is now running" >/dev/null
- name: Run the integration tests
run: docker exec -w /spark-connector/functional-tests docker_client_1 sbt run
- name: Remove docker containers
run: cd docker && docker-compose down
#run-integration-tests:
# runs-on: ubuntu-latest
# needs: [build]
# env:
# VERTICA_VERSION: 12.0.3-0
# steps:
# - name: Checkout the project
# uses: actions/checkout@v2
# - name: Set up JDK 11
# uses: actions/setup-java@v4
# with:
# java-version: '11'
# distribution: 'adopt'
# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
# - name: Install Docker Compose
# run: |
# sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
# sudo chmod +x /usr/local/bin/docker-compose
# - name: Check Docker version
# run: docker --version
# - name: Check Docker Compose version
# run: docker compose version
# - name: Install sbt
# run: |
# echo "Installing sbt..."
# echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
# curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add -
# sudo apt-get update
# sudo apt-get install -y sbt
# - name: Run docker compose
# run: cd docker && docker-compose up -d
# - name: Download the build artifact
# uses: actions/download-artifact@v4
# with:
# name: build-jar-file
# path: ./functional-tests/lib/
# - name: Wait for Vertica to be available
# uses: nick-invision/retry@v2
# with:
# timeout_seconds: 20
# max_attempts: 10
# retry_on: error
# command: docker logs docker_vertica_1 | grep "Vertica container is now running" >/dev/null
# - name: Run the integration tests
# run: docker exec -w /spark-connector/functional-tests docker_client_1 sbt run
# - name: Remove docker containers
# run: cd docker && docker-compose down
2 changes: 1 addition & 1 deletion .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Build the project
run: cd connector && sbt package
- name: Upload the build artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: build-jar-file
path: /home/runner/work/spark-connector/spark-connector/connector/target/scala-2.12/spark-vertica-connector_2.12-*.jar
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/weekly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
- name: Build the project
run: cd connector && sbt package
- name: Upload the build artifact
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: build-jar-file-${{ matrix.jdk }}
path: /home/runner/work/spark-connector/spark-connector/connector/target/scala-2.12/spark-vertica-connector_2.12-*.jar
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
[![contributing](https://img.shields.io/badge/contributing-read-orange)](CONTRIBUTING.md)
[![license](https://img.shields.io/badge/license-Apache%202.0-orange.svg)](https://opensource.org/licenses/Apache-2.0)

![vertica-tested](https://img.shields.io/badge/Vertica%20Tested-10%20%7C%2011%20%7C%2012-blue)
![spark-tested](https://img.shields.io/badge/Spark%20Tested-3.0%20%7C%203.1%20%7C%203.2%20%7C%203.3-blue)
![vertica-tested](https://img.shields.io/badge/Vertica%20Tested-10%20%7C%2011%20%7C%2012%20%7C%2024-blue)
![spark-tested](https://img.shields.io/badge/Spark%20Tested-3.0%20%7C%203.1%20%7C%203.2%20%7C%203.3%20%7C%203.5-blue)

## Overview

Expand All @@ -35,7 +35,7 @@ The connector creates a JDBC connection to Vertica in order to manage the proces

To get started with using the connector, we'll need to make sure all the prerequisites are in place. These are:
- Vertica (10.1.1-0 or higher)
- Spark (3.0.0 or higher)
- Spark (3.5.5 or higher)
- An HDFS cluster or HDFS-compatible filesystem (S3, Google Cloud Storage, etc), for use as an intermediary between Spark and Vertica
- A Spark application, either running locally for quick testing, or running on a Spark cluster. If using S3, Spark must be using hadoop 3.3

Expand All @@ -49,7 +49,7 @@ The connector has been tested against Vertica 10.1.1-0 and higher.

### Spark

The connector requires Spark 3.0.0 or higher.
The connector requires Spark 3.5.5 or higher.

There are several examples of Spark programs that use this connector in the [examples](/examples) directory.

Expand Down Expand Up @@ -79,7 +79,7 @@ The connector requires Java 8 (8u92 or later) or Java 11.

### Scala

For the Spark Connector, Spark 3.0.0 and above use Scala 2.12. You will need to use a compatible Scala version (2.12.x).
For the Spark Connector, Spark 3.5.5 and above use Scala 2.13. You will need to use a compatible Scala version (2.13.x).

### Intermediary Storage

Expand Down
27 changes: 14 additions & 13 deletions connector/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,25 @@ versionProps := {
prop
}

scalaVersion := "2.12.12"
scalaVersion := "2.13.16"
name := "spark-vertica-connector"
organization := "com.vertica"
version := versionProps.value.getProperty("connector-version")

resolvers += "Artima Maven Repository" at "https://repo.artima.com/releases"
resolvers += "jitpack" at "https://jitpack.io"

libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.1.2"
libraryDependencies += "com.vertica.jdbc" % "vertica-jdbc" % "11.0.2-0"
libraryDependencies += "org.apache.spark" %% "spark-core" % "3.3.0"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.3.0"
libraryDependencies += "org.apache.hadoop" % "hadoop-hdfs" % "3.3.2"
libraryDependencies += "org.scalactic" %% "scalactic" % "3.2.2" % Test
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.2" % "test"
libraryDependencies += "com.typesafe.scala-logging" %% "scala-logging" % "3.9.2"
libraryDependencies += "org.scalamock" %% "scalamock" % "4.4.0" % Test
libraryDependencies += "org.typelevel" %% "cats-core" % "2.1.1"
libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "2.3.0"
libraryDependencies += "com.vertica.jdbc" % "vertica-jdbc" % "24.4.0-0"
libraryDependencies += "org.typelevel" %% "cats-core" % "2.13.0"
libraryDependencies += "org.apache.spark" %% "spark-core" % "3.5.5"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.5.5"
libraryDependencies += "org.apache.hadoop" % "hadoop-hdfs" % "3.3.4"
libraryDependencies += "org.scalactic" %% "scalactic" % "3.2.16" % Test
libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.16" % "test"
libraryDependencies += "com.typesafe.scala-logging" %% "scala-logging" % "3.9.5"
libraryDependencies += "org.scalamock" %% "scalamock" % "5.2.0" % Test
libraryDependencies += "org.typelevel" %% "cats-core" % "2.10.0"
// Hadoop's jersey-server conflicts with Spark's and can cause a Spark UI issue
excludeDependencies += ExclusionRule("com.sun.jersey", "jersey-server")
Test / parallelExecution := false
Expand All @@ -54,10 +55,10 @@ sonarProperties ++= Map(
"sonar.host.url" -> "http://localhost:80",
)

ThisBuild / scapegoatVersion := "1.3.3"
ThisBuild / scapegoatVersion := "3.1.4"
scapegoatReports := Seq("xml")
Scapegoat / scalacOptions += "-P:scapegoat:overrideLevels:all=Warning"
scalacOptions += "-Ypartial-unification"
//scalacOptions += "-Ypartial-unification"
scalacOptions += "-Ywarn-value-discard"

scalastyleFailOnError := true
Expand Down
2 changes: 1 addition & 1 deletion connector/project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.5.2
sbt.version=1.5.5
2 changes: 1 addition & 1 deletion connector/project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
resolvers += "Artima Maven Repository" at "https://repo.artima.com/releases"

addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
addSbtPlugin("com.artima.supersafe" % "sbtplugin" % "1.1.10")
addSbtPlugin("com.artima.supersafe" % "sbtplugin" % "1.1.12")
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.1")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2")
addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.8.1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ import org.apache.parquet.io.api.RecordMaterializer
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport
import org.apache.spark.sql.execution.datasources.parquet.vertica.ParquetReadSupport
import org.apache.spark.sql.internal.LegacyBehaviorPolicy
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
import org.apache.spark.sql.types.StructType

import java.util
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import com.vertica.spark.datasource.partitions.file.{PartitionedFileIdentity, Ve
import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory, Scan}
import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.SerializableConfiguration

/**
* Wraps a [[Scan]] so that it will create a [[PartitionReaderWrapperFactory]]
Expand All @@ -37,10 +38,10 @@ class VerticaScanWrapper(val scan: Scan, val config: ReadConfig) extends Scan wi
def makeFilesIdentity(files: Array[PartitionedFile]): Array[PartitionedFileIdentity] = {
// Record each files to the count and create each an identity
files.map(file => {
val key = file.filePath
val key = file.filePath.toString
val count = partitioningCounts.getOrElse(key, 0)
partitioningCounts.put(key, count + 1)
PartitionedFileIdentity(file.filePath, file.start)
PartitionedFileIdentity(key, file.start)
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.parquet.schema.Type.Repetition
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
import org.apache.spark.sql.internal.LegacyBehaviorPolicy
import org.apache.spark.sql.types._

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer}
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
import org.apache.spark.sql.internal.LegacyBehaviorPolicy
import org.apache.spark.sql.types.StructType

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CaseInsensitiveMap, DateTimeUtils, GenericArrayData}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
import org.apache.spark.sql.internal.LegacyBehaviorPolicy
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

Expand Down Expand Up @@ -255,13 +255,13 @@ private[parquet] class ParquetRowConverter(
case ByteType =>
new ParquetPrimitiveConverter(updater) {
override def addInt(value: Int): Unit =
updater.setByte(value.asInstanceOf[ByteType#InternalType])
updater.setByte(value.asInstanceOf[Byte])
}

case ShortType =>
new ParquetPrimitiveConverter(updater) {
override def addInt(value: Int): Unit =
updater.setShort(value.asInstanceOf[ShortType#InternalType])
updater.setShort(value.asInstanceOf[Short])
}

// For INT32 backed decimals
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ package org.apache.spark.sql.execution.datasources.parquet.vertica

import org.apache.spark.sql.catalyst.util.RebaseDateTime
import org.apache.spark.sql.execution.datasources.DataSourceUtils
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
import org.apache.spark.sql.internal.LegacyBehaviorPolicy

/**
* Copied from Spark 3.2.0 DataSourceUtils implementation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import com.vertica.spark.util.error._
import com.vertica.spark.util.error.ErrorHandling.{listToEitherSchema, ConnectorResult, SchemaResult}
import com.vertica.spark.util.query.{ColumnInfo, ColumnsTable, ComplexTypesTable, StringParsingUtils}
import com.vertica.spark.util.schema.ComplexTypesSchemaTools.{VERTICA_NATIVE_ARRAY_BASE_ID, VERTICA_SET_MAX_ID}
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.types._

import java.sql.ResultSetMetaData
Expand Down
Loading