Skip to content

Commit 2f82cd8

Browse files
jonkeanezanmato1984
authored andcommitted
GH-46123: [C++] Undefined behavior in compare_internal.cc and light_array_internal.cc (#46124)
### Rationale for this change Removing undefined behaviors, adding a CI job for the M1-mac sanitizer now being run. ### What changes are included in this PR? The fixes + a sanitizer job that confirms they are fixed. ### Are these changes tested? Yup! ### Are there any user-facing changes? Fewer undefined behaviors. * GitHub Issue: #46123 Lead-authored-by: Jonathan Keane <[email protected]> Co-authored-by: Rossi Sun <[email protected]> Signed-off-by: Jonathan Keane <[email protected]>
1 parent af15118 commit 2f82cd8

File tree

7 files changed

+174
-28
lines changed

7 files changed

+174
-28
lines changed

ci/scripts/r_sanitize.sh

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,34 +36,38 @@ ncores=$(${R_BIN} -s -e 'cat(parallel::detectCores())')
3636
echo "MAKEFLAGS=-j${ncores}" >> ${rhome}/etc/Renviron.site
3737

3838
# build first so that any stray compiled files in r/src are ignored
39-
${R_BIN} CMD build .
40-
${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz
39+
${R_BIN} CMD build --no-build-vignettes --no-manual .
4140

4241
# But unset the env var so that it doesn't cause us to run extra dev tests
4342
unset ARROW_R_DEV
4443

4544
# Set the testthat output to be verbose for easier debugging
4645
export ARROW_R_VERBOSE_TEST=TRUE
4746

48-
export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
47+
# We prune dependencies for these, so we need to disable forcing suggests
48+
export _R_CHECK_FORCE_SUGGESTS_=FALSE
49+
50+
export SUPPRESSION_FILE=$(readlink -f "tools/ubsan.supp")
51+
export UBSAN_OPTIONS="print_stacktrace=1,suppressions=${SUPPRESSION_FILE}"
4952
# From the old rhub image https://github.com/r-hub/rhub-linux-builders/blob/master/fedora-clang-devel-san/Dockerfile
5053
export ASAN_OPTIONS="alloc_dealloc_mismatch=0:detect_leaks=0:detect_odr_violation=0"
5154

52-
# run tests
53-
pushd tests
54-
${R_BIN} --no-save < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; }
55+
${R_BIN} CMD check --no-manual --no-vignettes --no-build-vignettes arrow*.tar.gz
5556

56-
cat testthat.out
57-
if grep -q "runtime error" testthat.out; then
57+
# Find sanitizer issues, print the file(s) they are part of, and fail the job
58+
find . -type f -name "*Rout" -exec grep -l "runtime error\|SUMMARY: UndefinedBehaviorSanitizer" {} \; > sanitizer_errors.txt
59+
if [ -s sanitizer_errors.txt ]; then
60+
echo "Sanitizer errors found in the following files:"
61+
cat sanitizer_errors.txt
62+
63+
# Print the content of files with errors for debugging
64+
while read -r file; do
65+
echo "=============== $file ==============="
66+
cat "$file"
67+
echo "========================================="
68+
done < sanitizer_errors.txt
69+
5870
exit 1
5971
fi
6072

61-
# run examples
62-
popd
63-
${R_BIN} --no-save -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; }
64-
65-
cat examples.out
66-
if grep -q "runtime error" examples.out; then
67-
exit 1
68-
fi
6973
popd

cpp/src/arrow/compute/light_array_internal.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -611,11 +611,9 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
611611
});
612612
Visit(source, num_rows_to_append - num_rows_to_process, row_ids + num_rows_to_process,
613613
[&](int i, const uint8_t* ptr, int32_t num_bytes) {
614-
uint64_t* dst = reinterpret_cast<uint64_t*>(
615-
target->mutable_data(2) +
616-
offsets[num_rows_before + num_rows_to_process + i]);
617-
const uint64_t* src = reinterpret_cast<const uint64_t*>(ptr);
618-
memcpy(dst, src, num_bytes);
614+
auto dst = target->mutable_data(2) +
615+
offsets[num_rows_before + num_rows_to_process + i];
616+
memcpy(dst, ptr, num_bytes);
619617
});
620618
}
621619

cpp/src/arrow/compute/row/compare_internal.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,11 @@ void KeyCompare::CompareVarBinaryColumnToRowHelper(
275275
int32_t tail_length = length - j * 8;
276276
uint64_t tail_mask = ~0ULL >> (64 - 8 * tail_length);
277277
uint64_t key_left = 0;
278-
std::memcpy(&key_left, key_left_ptr + j, tail_length);
278+
// NOTE: UBSAN may falsely report "misaligned load" in `std::memcpy` on some
279+
// platforms when using 64-bit pointers. Cast to an 8-bit pointer to work around
280+
// this.
281+
const uint8_t* src_bytes = reinterpret_cast<const uint8_t*>(key_left_ptr + j);
282+
std::memcpy(&key_left, src_bytes, tail_length);
279283
uint64_t key_right = key_right_ptr[j];
280284
result_or |= tail_mask & (key_left ^ key_right);
281285
}

dev/tasks/docker-tests/github.linux.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ jobs:
6666
uses: actions/upload-artifact@v4
6767
with:
6868
name: test-output
69-
path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
69+
path: |
70+
arrow/r/tests/
71+
arrow/r/arrow.Rcheck/
72+
!arrow/r/arrow.Rcheck/00_pkg_src/
73+
!arrow/r/arrow.Rcheck/arrow/
7074
if-no-files-found: ignore
7175

7276
{% if arrow.is_default_branch() %}

dev/tasks/r/github.macos.m1san.yml

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
{% import 'macros.jinja' as macros with context %}
19+
20+
{{ macros.github_header() }}
21+
22+
jobs:
23+
macos-cran:
24+
name: "m1-san on macOS"
25+
runs-on: macOS-15
26+
strategy:
27+
fail-fast: false
28+
29+
steps:
30+
{{ macros.github_checkout_arrow()|indent }}
31+
32+
- name: Configure dependencies (macos)
33+
run: |
34+
brew install openssl
35+
- uses: r-lib/actions/setup-r@v2
36+
with:
37+
use-public-rspm: true
38+
r-version: devel
39+
- name: Install dependencies
40+
uses: r-lib/actions/setup-r-dependencies@v2
41+
with:
42+
cache: false # cache does not work on across branches
43+
working-directory: arrow/r
44+
extra-packages: |
45+
any::rcmdcheck
46+
any::sys
47+
- name: Setup the sanitizer
48+
run: |
49+
# From rhub/rhub's m1-san:
50+
# https://github.com/r-hub/actions/blob/ab9b2fb5021f43bfd0ee977932e0a14d25f6e59e/run-check/scripts/mac-asan.sh
51+
# Use XCode 16.3 ------------------------------------------------
52+
sudo rm -f /Applications/Xcode.app
53+
sudo ln -sfF /Applications/Xcode_16.3.app /Applications/Xcode.app
54+
sudo xcode-select -s /Applications/Xcode.app
55+
56+
# Compile with sanitizers ---------------------------------------
57+
mkdir -p ~/.R
58+
cat >> ~/.R/Makevars <<EOF
59+
CC+=-mmacos-version-min=15 -fsanitize=address,undefined
60+
CXX+=-mmacos-version-min=15 -fsanitize=address,undefined
61+
CXX11+=-mmacos-version-min=15 -fsanitize=address,undefined
62+
CXX14+=-mmacos-version-min=15 -fsanitize=address,undefined
63+
CXX17+=-mmacos-version-min=15 -fsanitize=address,undefined
64+
CXX20+=-mmacos-version-min=15 -fsanitize=address,undefined
65+
CXX23+=-mmacos-version-min=15 -fsanitize=address,undefined
66+
REC_INSTALL_OPT=--dsym
67+
EOF
68+
69+
# Need to patch the shell script, because these env vars --------
70+
# do not go through a shell because of Apple security.
71+
R=$(readlink `which R`)
72+
head -1 ${R} >> /tmp/R
73+
cat >> /tmp/R <<EOF
74+
export DYLD_FORCE_FLAT_NAMESPACE=1
75+
export DYLD_INSERT_LIBRARIES=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17/lib/darwin/libclang_rt.asan_osx_dynamic.dylib
76+
EOF
77+
cat ${R} >> /tmp/R
78+
chmod +x /tmp/R
79+
sudo mv /tmp/R ${R}
80+
- name: Install and check
81+
env:
82+
R_BIN: R
83+
LIBARROW_BINARY: FALSE
84+
run: |
85+
export ARROW_SOURCE_HOME=$(pwd)/arrow
86+
export INSTALL_ARGS="--no-test-load"
87+
arrow/ci/scripts/r_sanitize.sh arrow
88+
- name: Save the test output
89+
uses: actions/upload-artifact@v4
90+
with:
91+
name: test-output
92+
path: |
93+
arrow/r/tests/
94+
arrow/r/arrow.Rcheck/
95+
!arrow/r/arrow.Rcheck/00_pkg_src/
96+
!arrow/r/arrow.Rcheck/arrow/
97+
if: always()

dev/tasks/tasks.yml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,13 +1087,25 @@ tasks:
10871087
image: ubuntu-r-sanitizer
10881088
timeout: 120
10891089

1090-
test-r-clang-sanitizer:
1090+
test-r-clang-asan:
10911091
ci: github
10921092
template: docker-tests/github.linux.yml
10931093
params:
10941094
env:
10951095
R_PRUNE_DEPS: TRUE
1096-
image: r-clang-sanitizer
1096+
image: r-clang-asan
1097+
1098+
test-r-clang-ubsan:
1099+
ci: github
1100+
template: docker-tests/github.linux.yml
1101+
params:
1102+
env:
1103+
R_PRUNE_DEPS: TRUE
1104+
image: r-clang-ubsan
1105+
1106+
test-r-m1-san:
1107+
ci: github
1108+
template: r/github.macos.m1san.yml
10971109

10981110
# be sure to update binary-task.rb when upgrading Debian
10991111
test-debian-12-docs:

docker-compose.yml

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ x-hierarchy:
162162
- ubuntu-r-valgrind
163163
- ubuntu-swift
164164
- ubuntu-verify-rc
165-
- r-clang-sanitizer
165+
- r-clang-asan
166+
- r-clang-ubsan
166167
- r
167168
- r-revdepcheck
168169
# helper services
@@ -1717,7 +1718,7 @@ services:
17171718
/bin/bash -c "
17181719
/arrow/ci/scripts/r_sanitize.sh /arrow"
17191720
1720-
r-clang-sanitizer:
1721+
r-clang-asan:
17211722
image: ${REPO}:r-rhub-clang-devel-latest
17221723
build:
17231724
context: .
@@ -1737,7 +1738,33 @@ services:
17371738
LIBARROW_DOWNLOAD: "false"
17381739
ARROW_SOURCE_HOME: "/arrow"
17391740
ARROW_R_DEV: ${ARROW_R_DEV}
1740-
# To test for CRAN release, delete ^^ these two env vars so we download the Apache release
1741+
ARROW_USE_PKG_CONFIG: "false"
1742+
volumes:
1743+
- .:/arrow:delegated
1744+
command: >
1745+
/bin/bash -c "
1746+
/arrow/ci/scripts/r_sanitize.sh /arrow"
1747+
1748+
r-clang-ubsan:
1749+
image: ${REPO}:r-rhub-clang-ubsan-devel-latest
1750+
build:
1751+
context: .
1752+
dockerfile: ci/docker/linux-r.dockerfile
1753+
cache_from:
1754+
- ${REPO}:r-rhub-clang-ubsan-devel-latest
1755+
args:
1756+
base: rhub/clang-ubsan
1757+
cmake: ${CMAKE}
1758+
r_dev: ${ARROW_R_DEV}
1759+
r_bin: R
1760+
tz: ${TZ}
1761+
r_prune_deps: ${R_PRUNE_DEPS}
1762+
shm_size: *shm-size
1763+
environment:
1764+
<<: *common
1765+
LIBARROW_DOWNLOAD: "false"
1766+
ARROW_SOURCE_HOME: "/arrow"
1767+
ARROW_R_DEV: ${ARROW_R_DEV}
17411768
ARROW_USE_PKG_CONFIG: "false"
17421769
volumes:
17431770
- .:/arrow:delegated

0 commit comments

Comments
 (0)