Skip to content

Commit 8204d63

Browse files
authored
Merge pull request tree-sitter#2411 from amaanq/fuzz-and-more
update fuzz script, some minor fixes & improvements, add asan to ci
2 parents 3f44b89 + 211e13d commit 8204d63

File tree

12 files changed

+54
-57
lines changed

12 files changed

+54
-57
lines changed

.github/workflows/sanitize.yml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Sunitize
1+
name: Sanitize
22

33
env:
44
CARGO_TERM_COLOR: always
@@ -9,7 +9,7 @@ on:
99

1010
jobs:
1111
check_undefined_behaviour:
12-
name: Undefined behaviour checks
12+
name: Sanitizer checks
1313
runs-on: ubuntu-latest
1414
env:
1515
TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter
@@ -38,3 +38,12 @@ jobs:
3838
CFLAGS: -fsanitize=undefined
3939
RUSTFLAGS: -lubsan
4040
run: cargo test -- --test-threads 1
41+
42+
- name: Run main tests with address sanitizer (ASAN)
43+
env:
44+
CFLAGS: -fsanitize=address
45+
RUSTFLAGS: -Zsanitizer=address
46+
run: |
47+
rustup install nightly
48+
rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
49+
cargo +nightly test -Z build-std --target x86_64-unknown-linux-gnu -- --test-threads 1

cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ version = "0.4.19"
7272
features = ["std"]
7373

7474
[dev-dependencies]
75-
proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
75+
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
7676

7777
rand = "0.8.5"
7878
tempfile = "3.6.0"

cli/src/generate/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ pub fn generate_parser_in_directory(
6161
None => {
6262
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
6363
grammar_json = load_grammar_file(&grammar_js_path)?;
64-
fs::write(&src_path.join("grammar.json"), &grammar_json)?;
64+
fs::write(&src_path.join("grammar.json"), &grammar_json)
65+
.with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?;
6566
}
6667
}
6768

cli/src/tests/corpus_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ use crate::{
1414
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
1515
util,
1616
};
17-
use proc_macro::test_with_seed;
1817
use std::{env, fs};
1918
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
19+
use tree_sitter_proc_macro::test_with_seed;
2020

2121
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
2222
fn test_corpus_for_bash(seed: usize) {

cli/src/tests/helpers/fixtures.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
8686

8787
pub fn get_test_grammar(name: &str) -> (String, Option<PathBuf>) {
8888
let dir = fixtures_dir().join("test_grammars").join(name);
89-
let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap();
89+
let grammar = fs::read_to_string(&dir.join("grammar.json")).expect(&format!(
90+
"Can't find grammar.json for test grammar {}",
91+
name
92+
));
9093
(grammar, Some(dir))
9194
}

cli/src/tests/parser_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ use crate::{
88
generate::generate_parser_for_grammar,
99
parse::{perform_edit, Edit},
1010
};
11-
use proc_macro::retry;
1211
use std::{
1312
sync::atomic::{AtomicUsize, Ordering},
1413
thread, time,
1514
};
1615
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
16+
use tree_sitter_proc_macro::retry;
1717

1818
#[test]
1919
fn test_parsing_simple_string() {

lib/include/tree_sitter/api.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
extern "C" {
66
#endif
77

8-
#include <stdio.h>
98
#include <stdlib.h>
109
#include <stdint.h>
1110
#include <stdbool.h>

script/build-fuzzers

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,30 @@ if [[ "$(uname -s)" != Linux ]]; then
66
exit 1
77
fi
88

9-
if [[ -z "$LIB_FUZZER_PATH" ]]; then
10-
echo "LIB_FUZZER_PATH not set"
11-
exit 1
12-
fi
13-
149
CC=${CC:-clang}
1510
CXX=${CXX:-clang++}
16-
LINK=${LINK:-clang++}
1711

1812
default_fuzz_flags="-fsanitize=fuzzer,address,undefined"
1913

2014
CFLAGS=${CFLAGS:-"$default_fuzz_flags"}
2115
CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"}
2216

2317
export CFLAGS
24-
make
18+
make CC="$CC" CXX="$CXX"
2519

2620
if [ -z "$@" ]; then
2721
languages=$(ls test/fixtures/grammars)
2822
else
2923
languages="$@"
3024
fi
3125

26+
mkdir -p test/fuzz/out
27+
3228
for lang in ${languages[@]}; do
29+
# skip typescript
30+
if [[ $lang == "typescript" ]]; then
31+
continue
32+
fi
3333
echo "Building $lang fuzzer..."
3434
lang_dir="test/fixtures/grammars/$lang"
3535

@@ -54,19 +54,24 @@ for lang in ${languages[@]}; do
5454
highlights_filename="${lang_dir}/queries/highlights.scm"
5555
if [ -e "${highlights_filename}" ]; then
5656
ts_lang_query_filename="${lang}.scm"
57-
cp "${highlights_filename}" "out/${ts_lang_query_filename}"
57+
cp "${highlights_filename}" "test/fuzz/out/${ts_lang_query_filename}"
5858
else
5959
ts_lang_query_filename=""
6060
fi
6161

6262
# FIXME: We should extract the grammar name from grammar.js. Use the name of
6363
# the directory instead. Also, the grammar name needs to be a valid C
6464
# identifier so replace any '-' characters
65-
ts_lang="tree_sitter_$(echo $lang | tr -- - _)"
65+
ts_lang="tree_sitter_$(echo "$lang" | tr -- - _)"
6666
$CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \
6767
"test/fuzz/fuzzer.cc" "${objects[@]}" \
68-
libtree-sitter.a "$LIB_FUZZER_PATH" \
69-
-o "out/${lang}_fuzzer"
68+
libtree-sitter.a \
69+
-o "test/fuzz/out/${lang}_fuzzer"
7070

71-
python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict"
71+
jq '
72+
[ ..
73+
| if .type? == "STRING" or (.type? == "ALIAS" and .named? == false) then .value else empty end
74+
| select(test("\\S") and length == utf8bytelength)
75+
] | unique | .[]
76+
' | sort
7277
done

script/run-fuzzer

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@ root=$(dirname "$0")/..
66
export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1"
77
export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1"
88

9-
declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" )
9+
# check if CI env var exists
10+
11+
if [ -z "${CI:-}" ]; then
12+
declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=2048" ["recover"]="-timeout=10 -rss_limit_mb=2048" )
13+
else
14+
declare -A mode_config=( ["halt"]="-max_total_time=120 -timeout=1 -rss_limit_mb=2048" ["recover"]="-time=120 -timeout=10 -rss_limit_mb=2048" )
15+
fi
1016

1117
run_fuzzer() {
1218
if [ "$#" -lt 2 ]; then
@@ -21,15 +27,16 @@ run_fuzzer() {
2127
# Treat remainder of arguments as libFuzzer arguments
2228

2329
# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first
24-
results="${root}/out/fuzz-results/${lang}_${mode}"
30+
results="${root}/test/fuzz/out/fuzz-results/${lang}"
2531
mkdir -p "${results}"
2632
cd "${results}"
2733

2834
# Create a corpus directory, so new discoveries are stored on disk. These will
2935
# then be loaded on subsequent fuzzing runs
3036
mkdir -p corpus
3137

32-
"../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@"
38+
pwd
39+
"../../${lang}_fuzzer" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_" -max_len=2048 "${mode_config[$mode]}" "./corpus" "$@"
3340
}
3441

3542
reproduce() {
@@ -46,7 +53,7 @@ reproduce() {
4653
shift
4754
# Treat remainder of arguments as libFuzzer arguments
4855

49-
"${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}" "$@"
56+
"${root}/test/fuzz/out/${lang}_fuzzer" "${mode_config[$mode]}" -runs=1 "${testcase}" "$@"
5057
}
5158

5259
script=$(basename "$0")

tags/src/c_lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,11 @@ pub extern "C" fn ts_tagger_add_language(
8484
let tagger = unwrap_mut_ptr(this);
8585
let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) };
8686
let tags_query = unsafe { slice::from_raw_parts(tags_query, tags_query_len as usize) };
87-
let locals_query = unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) };
87+
let locals_query = if locals_query != std::ptr::null() {
88+
unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) }
89+
} else {
90+
&[]
91+
};
8892
let tags_query = match str::from_utf8(tags_query) {
8993
Ok(e) => e,
9094
Err(_) => return TSTagsError::InvalidUtf8,

0 commit comments

Comments
 (0)