data-studios-datasources

RStudio

Example 1 - RNASeq

Note that the CSV file paths may be different for you.

# Adapted from source: https://combine-australia.github.io/RNAseq-R/09-applying-rnaseq-solutions.html

install.packages("BiocManager")
BiocManager::install(c("limma"))
BiocManager::install(c("edgeR"))
BiocManager::install(c("org.Dm.eg.db"))
BiocManager::install(c("gplots"))
BiocManager::install(c("RColorBrewer"))
library(limma)
library(edgeR)
library(gplots)
library(RColorBrewer)
library(org.Dm.eg.db)

counts <- read.delim(file="/workspace/data/datastudios-demo-rstudio/input/2024-05-13/counts_Drosophila.txt")
targets <- read.delim(file="/workspace/data/datastudios-demo-rstudio/input/2024-05-13/SampleInfo_Drosophila.txt")
head(counts)
targets
table(targets$Group)
mycpm <- cpm(counts)
plot(counts[,1],mycpm[,1],xlim=c(0,20),ylim=c(0,5))
abline(v=10,col=2)
abline(h=2,col=4)
thresh <- mycpm > 2
keep <- rowSums(thresh) >= 3
table(keep)
counts.keep <- counts[keep,]
dim(counts.keep)
y <- DGEList(counts.keep)
barplot(y$samples$lib.size)

par(mfrow=c(1,1))
# Get log2 counts per million
logcpm <- cpm(y$counts,log=TRUE)
# Check distributions of samples using boxplots
boxplot(logcpm, xlab="", ylab="Log2 counts per million",las=2,outline=FALSE)
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(logcpm),col="blue")
title("Boxplots of logCPMs (unnormalised)")

par(mfrow=c(1,2),oma=c(2,0,0,0))
group.col <- c("red","blue")[targets$Group]
boxplot(logcpm, xlab="", ylab="Log2 counts per million",las=2,col=group.col,
        pars=list(cex.lab=0.8,cex.axis=0.8))
abline(h=median(logcpm),col="blue")
title("Boxplots of logCPMs\n(coloured by groups)",cex.main=0.8)

lib.col <- c("light pink","light green")[targets$Library]
boxplot(logcpm, xlab="", ylab="Log2 counts per million",las=2, col=lib.col,
        pars=list(cex.lab=0.8,cex.axis=0.8))
abline(h=median(logcpm),col="blue")
title("Boxplots of logCPMs\n(coloured by library prep)",cex.main=0.8)

par(mfrow=c(1,2))
plotMDS(y,col=group.col)
legend("topright",legend=levels(targets$Group),fill=c("red","blue"))
plotMDS(y,col=lib.col)
legend("topleft",legend=levels(targets$Library),fill=c("light pink","light green"))

logcounts <- cpm(y,log=TRUE)
var_genes <- apply(logcounts, 1, var)
select_var <- names(sort(var_genes, decreasing=TRUE))[1:500]

highly_variable_lcpm <- logcounts[select_var,]
dim(highly_variable_lcpm)
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)
heatmap.2(highly_variable_lcpm,col=rev(morecols(50)),trace="none", main="Top 500 most variable genes across samples",ColSideColors=group.col,scale="row",margins=c(10,5))

Example 2 - Shiny

install.packages("dplyr")
install.packages("gapminder")
install.packages("ggplot2")
install.packages("shiny")

library(shiny)
library(dplyr)
library(ggplot2)
library(gapminder)

# Specify the application port
options(shiny.host = "0.0.0.0")
options(shiny.port = 8180)

ui <- fluidPage(
  sidebarLayout(
    sidebarPanel(
      tags$h4("Gapminder Dashboard"),
      tags$hr(),
      selectInput(inputId = "inContinent", label = "Continent", choices = unique(gapminder$continent), selected = "Europe")
    ),
    mainPanel(
      plotOutput(outputId = "outChartLifeExp"),
      plotOutput(outputId = "outChartGDP")
    )
  )
)

server <- function(input, output, session) {
  # Filter data and store as reactive value
  data <- reactive({
    gapminder %>%
      filter(continent == input$inContinent) %>%
      group_by(year) %>%
      summarise(
        AvgLifeExp = round(mean(lifeExp)),
        AvgGdpPercap = round(mean(gdpPercap), digits = 2)
      )
  })

  # Common properties for charts
  chart_theme <- ggplot2::theme(
    plot.title = element_text(hjust = 0.5, size = 20, face = "bold"),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  )

  # Render Life Exp chart
  output$outChartLifeExp <- renderPlot({
    ggplot(data(), aes(x = year, y = AvgLifeExp)) +
      geom_col(fill = "#0099f9") +
      geom_text(aes(label = AvgLifeExp), vjust = 2, size = 6, color = "#ffffff") +
      labs(title = paste("Average life expectancy in", input$inContinent)) +
      theme_classic() +
      chart_theme
  })

  # Render GDP chart
  output$outChartGDP <- renderPlot({
    ggplot(data(), aes(x = year, y = AvgGdpPercap)) +
      geom_line(color = "#f96000", size = 2) +
      geom_point(color = "#f96000", size = 5) +
      geom_label(
        aes(label = AvgGdpPercap),
        nudge_x = 0.25,
        nudge_y = 0.25
      ) +
      labs(title = paste("Average GDP per capita in", input$inContinent)) +
      theme_classic() +
      chart_theme
  })
}

shinyApp(ui = ui, server = server)

Example 3 - Volcano Plot using Shiny

Note that the CSV file path may be different for you.

install.packages("shiny")
install.packages("plotly")
install.packages("tidyverse")

library(shiny)
library(plotly)
library(tidyverse)

ui <- fluidPage(
  titlePanel("Volcano Plotly"),
  fluidRow(
    column(
      width = 7,
      plotlyOutput("volcanoPlot", height = "500px")
    ),
    column(
      width = 5,
      dataTableOutput("selectedProbesTable")
    )
  )
)

csv_file = "/workspace/data/datastudios-demo-rstudio/input/2024-05-20_volcano-examples/NKI-DE-results.csv"

server <- function(input, output) {

  differentialExpressionResults <-
    read.csv(csv_file, stringsAsFactors = FALSE) %>%
    mutate(
      probe.type = factor(ifelse(grepl("^Contig", probe), "EST", "mRNA")),
      minusLog10Pvalue = -log10(adj.P.Val),
      tooltip = ifelse(is.na(HUGO.gene.symbol), probe, paste(HUGO.gene.symbol, " (", probe, ")", sep = ""))
    ) %>%
    sample_n(1000)

  output$volcanoPlot <- renderPlotly({

    plot <- differentialExpressionResults %>%
      ggplot(aes(x = logFC,
                 y = minusLog10Pvalue,
                 colour = probe.type,
                 text = tooltip,
                 key = row.names(differentialExpressionResults))) +
      geom_point() +
      xlab("log fold change") +
      ylab("-log10(P-value)")

    plot %>%
      ggplotly(tooltip = "tooltip") %>%
      layout(dragmode = "select")
  })

  output$selectedProbesTable <- renderDataTable({

    eventData <- event_data("plotly_selected")

    selectedData <- differentialExpressionResults %>% slice(0)
    if (!is.null(eventData)) selectedData <- differentialExpressionResults[eventData$key,]

    selectedData %>%
      transmute(
        probe,
        gene = HUGO.gene.symbol,
        `log fold change` = signif(logFC, digits = 2),
        `p-value` = signif(adj.P.Val, digits = 2)
      )
  },
    options = list(dom = "tip", pageLength = 10, searching = FALSE)
  )
}

shinyApp(ui, server, options = list(height = 600))

Example 4 - Load JSON lib, change working directory on startup, read CSV to data frame, export data frame to JSON file

Note that the CSV file path may be different for you.

install.packages("RJSONIO")
library(RJSONIO)
setwd("/workspace/data")
df <- read.csv("input/2024-01-16/polling_places.csv")
exportJson <- toJSON(df)
write(exportJson, "output/output.json")

JupyterLab

Example 1 - Install additonal packages

!pip install pandas[pyarrow] jupytext scipy jupyterlab-git qgrid seaborn nb_black

Example 2 - Read CSV to data frame, export data frame to JSON file

Note that the CSV file path may be different for you.

import pandas as pd
df = pd.read_csv('2024-01-16/polling_places.csv', low_memory=False)
df
df.to_json('output.json')

Example 3 - Use IGV

Add the following customization via the Conda packages field

channels:
  - conda-forge
dependencies:
  - ipyigv

Then when the session has launched, add the following Python code.

from ipyigv import IgvBrowser as Browser, PUBLIC_GENOMES
from ipyigv.options import ReferenceGenome, Track

genome = ReferenceGenome(**PUBLIC_GENOMES.hg38)

browser = Browser(genome=genome)
browser

You will see the following:

VSCode

TBD

Name		Name	Last commit message	Last commit date
Latest commit History 13 Commits
.gitignore		.gitignore
LICENSE		LICENSE
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Repository files navigation

data-studios-datasources

RStudio

Example 1 - RNASeq

Example 2 - Shiny

Example 3 - Volcano Plot using Shiny

Example 4 - Load JSON lib, change working directory on startup, read CSV to data frame, export data frame to JSON file

JupyterLab

Example 1 - Install additonal packages

Example 2 - Read CSV to data frame, export data frame to JSON file

Example 3 - Use IGV

VSCode

About

Uh oh!

Releases

Packages

License

robnewman/data-studios-datasources

Folders and files

Latest commit

History

Repository files navigation

data-studios-datasources

RStudio

Example 1 - RNASeq

Example 2 - Shiny

Example 3 - Volcano Plot using Shiny

Example 4 - Load JSON lib, change working directory on startup, read CSV to data frame, export data frame to JSON file

JupyterLab

Example 1 - Install additonal packages

Example 2 - Read CSV to data frame, export data frame to JSON file

Example 3 - Use IGV

VSCode

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Packages