Skip to content

Cache engine for reticulate using dill #1210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
228624b
added unit tests that cover knitr #1505
tmastny Feb 26, 2018
b52ecaa
added cache_eng_python to add Python session caching between chunks. …
tmastny Feb 26, 2018
c345ce2
dill caching engine for knitr, with tests
tmastny Feb 28, 2018
ff39889
changes from feedback on knitr #1518 with updated tests
tmastny Apr 18, 2018
8e07779
fixed testing utils source in dill tests
tmastny Apr 19, 2018
638a4e7
Merge 'rstudio/main' with 'tmastny/master' into branch 'cache-engine'
leogama May 13, 2022
9753870
cache engine: update 'r' object identification logic
leogama Apr 14, 2022
02c1771
fix 'cache_path' when 'output.dir' is different from 'knitr:::input_d…
leogama Apr 20, 2022
dbebab3
cache loading should run in the input directory
leogama Apr 20, 2022
bd29f84
remove duplicated conversion functions
leogama May 26, 2022
f8497a0
Merge branch 'main' into cache-engine
leogama Sep 3, 2022
fe4cd9f
remove trailing whitespaces and empty line
leogama Sep 3, 2022
5d6f7a7
First version of cache implementation with new knitr API
leogama Sep 13, 2022
a33ed39
Expose the cache$available() method to knitr
leogama Sep 14, 2022
266463c
Use the same warning for missing and old dill module cases
leogama Sep 15, 2022
445a5ca
Set environment() as default argument in eng_python_initialize()
leogama Sep 15, 2022
c6a88ad
Basic test for knitr engine cache
leogama Sep 17, 2022
975c1b0
minor
leogama Sep 17, 2022
401b1ba
Workflows: install module dill in the testing virtualenv
leogama Sep 17, 2022
62c77d8
Docs: remove @params from cache_eng_python, add it to pkgdown index
leogama Sep 17, 2022
f487b52
Correctly initialize Python in knitr, honoring 'engine.path'
leogama Sep 19, 2022
cb9ee1f
Implement the 'cache.vars' chunk option; some style changes
leogama Sep 19, 2022
7d4eeec
Remove unused 'envir' parameter from 'eng_python_initialize*' functions
leogama Sep 20, 2022
395627e
update cache engine docs
leogama Dec 13, 2022
55d1e03
cache: adapt code and tests to dill package v0.3.6
leogama Dec 13, 2022
d43b593
fix typo, update generated documentation
leogama Dec 13, 2022
f354f60
Workflow: use PR branch from knitr for testing
leogama Dec 16, 2022
38ef3ce
fix typo
leogama Dec 20, 2022
79b9732
Merge branch 'main' into cache-engine
t-kalinowski Jun 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Implement the 'cache.vars' chunk option; some style changes
  • Loading branch information
leogama committed Sep 19, 2022
commit cb9ee1f2477cd0a691fe76f0f887e135fef5b89c
47 changes: 31 additions & 16 deletions R/knitr-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
#'
#' @export
cache_eng_python <- (function() {
closure <- environment()
dill <- NULL

cache_path <- function(path) {
paste(path, "pkl", sep=".")
}

check_cache_available <- function(options) {
MINIMUM_PYTHON_VERSION <- "3.7"
MINIMUM_DILL_VERSION <- "0.3.6"
Expand All @@ -29,10 +36,11 @@ cache_eng_python <- (function() {
}

# is the module 'dill' loadable and recent enough?
dill <- tryCatch(import("dill"), error = identity)
closure$dill <- tryCatch(import("dill"), error = identity)
if (!inherits(dill, "error")) {
dill_version <- as_numeric_version(dill$`__version__`)
if (dill_version >= MINIMUM_DILL_VERSION)
cache_initialize()
return(TRUE)
} else {
# handle non-import error
Expand All @@ -47,16 +55,15 @@ cache_eng_python <- (function() {
}

cache_available <- function(options) {
available <- knitr::opts_knit$get("reticulate.cache")
if (is.null(available)) {
available <- check_cache_available(options)
knitr::opts_knit$set(reticulate.cache = available)
}
available
if (is.null(closure$.cache_available))
closure$.cache_available <- check_cache_available(options)
.cache_available
}

cache_path <- function(path) {
paste(path, "pkl", sep=".")
cache_initialize <- function() {
# save imported objects by reference when possible
dill.session <- import("dill.session")
dill.session[["settings"]][["refimported"]] <- TRUE
}

cache_exists <- function(options) {
Expand All @@ -65,23 +72,31 @@ cache_eng_python <- (function() {

cache_load <- function(options) {
if (!cache_available(options)) return()
dill <- import("dill")
dill$load_module(filename = cache_path(options$hash), module = "__main__")
}

filter <- NULL
r_obj_filter <- function() {
if (is.null(filter)) {
filter <<- py_eval("lambda obj: obj.name == 'r' and type(obj.value) is __builtins__.__R__")
if (is.null(closure$.r_obj_filter)) {
expr <- "lambda obj: obj.name == 'r' and type(obj.value) is __builtins__.__R__"
closure$.r_obj_filter <- py_eval(expr)
}
filter
.r_obj_filter
}

cache_save <- function(options) {
if (!cache_available(options)) return()
dill <- import("dill")

# when only inclusion filters are specified, it works as an allowlist
if (!is.null(options$cache.vars)) {
exclude <- NULL # the R object won't be saved unless specified by cache.vars
include <- options$cache.vars
} else {
exclude <- r_obj_filter()
include <- NULL
}

tryCatch({
dill$dump_module(cache_path(options$hash), refimported = TRUE, exclude = r_obj_filter())
dill$dump_module(cache_path(options$hash), exclude = exclude, include = include)
}, error = function(e) {
cache_purge(options$hash)
stop(e)
Expand Down
18 changes: 18 additions & 0 deletions tests/testthat/resources/eng-reticulate-cache.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,21 @@ open('py_chunk_executed', 'w').close()
```{python, cache = FALSE}
print(x + 1)
```

The `cache.vars` chunk option may be used to select only a subset of variables
from the global environment to be cached.

```{python cache-vars, cache.vars = 'x'}
x = 42
spam = "Lovely SPAM! Wonderful SPAM!"
```

In a second excution of the document, `spam` should not be present, because it wasn't saved.

```{python, cache = FALSE}
def print_globals():
for name, value in globals().items():
if not name.startswith('__'):
print(name, ": ", value, sep="")
print_globals()
```
11 changes: 10 additions & 1 deletion tests/testthat/test-python-knitr-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ test_that("An R Markdown document using reticulate can be rendered with cache fe
flag_file <- "py_chunk_executed"
rmd_prefix <- "eng-reticulate-cache"
rmd_file <- paste(rmd_prefix, "Rmd", sep=".")
cache_path <- paste(rmd_prefix, "cache", sep="_")

withr::with_dir("resources", local({
withr::defer({
unlink(flag_file)
unlink(paste(rmd_prefix, "cache", sep="_"), recursive = TRUE)
unlink(cache_path, recursive = TRUE)
})

# cache file is created
Expand All @@ -26,5 +27,13 @@ test_that("An R Markdown document using reticulate can be rendered with cache fe
output <- rmarkdown::render(rmd_file, quiet = TRUE)
expect_false(file.exists(flag_file))
expect_true(file.exists(output))

# the 'spam' variable should not be cached in the 'cache-vars' block
main <- import_main()
dill <- import("dill")
py_del_attr(main, "spam")
session_file <- Sys.glob(paste0(cache_path, "/*/cache-vars_*.pkl"))
dill$load_module(session_file)
expect_false("spam" %in% names(main))
}))
})