library(MOCHA)
library(ArchR)
Optionally filter the ArchR project to a subset of samples.
# You should substitute this with your own ArchR project.
# You must have completed cell labeling with your ArchR project.
<- ArchR::loadArchRProject("/home/jupyter/FullCovid")
myArchRProj
# Define your annotation package for TxDb object(s)
# and genome-wide annotation.
# Here our samples are human using hg38 as a reference.
# For more info: <https://bioconductor.org/packages/3.15/data/annotation/>
library(TxDb.Hsapiens.UCSC.hg38.refGene)
library(org.Hs.eg.db)
<- TxDb.Hsapiens.UCSC.hg38.refGene
TxDb <- org.Hs.eg.db
Org
# Optional: Filter your ArchR project by sample.
# For our example we filter ArchR Project to three samples from
# each COVID Status (3 Positive, 3 Negative).
<- c( "B011-AP0C1W3", "B011-AP0C1W8", "B011-AP0C2W1", "B025_FSQAAZ0BZZS-01", "B025_FSQAAZ0C0YJ-01", "B025_FSQAAZ0C00P-07" )
samplesToKeep <- BiocGenerics::which(myArchRProj$Sample %in% samplesToKeep)
idxSample <- myArchRProj$cellNames[idxSample]
cellsSample <- myArchRProj[cellsSample, ] myArchRProj
These should be set according to your ArchR project and investigative question.
For more details on each of these parameters, view the help pages for
each function using ?callOpenTiles
and
?getSampleTileMatrix
# Parameters for calling open tiles.
<- "CellSubsets"
cellPopLabel <- c("MAIT", "CD16 Mono", "DC")
cellPopulations <- 20 numCores
Get sample-tile matrices for all specified cell populations.
<- MOCHA::callOpenTiles(
tileResults
myArchRProj, cellPopLabel = cellPopLabel,
cellPopulations = cellPopulations,
TxDb = TxDb,
Org = Org,
numCores = numCores
)
…for all cell populations.
These matrices are organized by cell population
RangedSummarizedExperiment object and are the primary input to
downstream analyses. An advantage of MOCHA’s ability to call
sample-specific open tiles is that we can determine a high-quality set
of”consensus tiles determined as follows: each sample “votes” on whether
a tile is open for that sample, and we keep tiles that are called open
by a minimum percentage of samples. The minimum percentage of samples
which a tile must be called in to be retained is controlled by
threshold
. groupColumn
can be provided to
specify a metadata column that separates your data by sample groups,
e.g. if you have a case and control groups. Consensus tiles will be
computed for each group, and the union of consensus tiles from each
group are retained. This should be used where there are expected
biological differences between the sample groups. Currently it is best
utilized when each group has a similar size, as threshold
will be applied to evenly each group. By default, groupColumn is null
and all samples will be pooled to vote on consensus tiles.
# We have 6 samples total: 3 samples for each COVID status (3 positive and 3 negative).
# Since these groupings may have unique biology and we expect differences
# in accessibility, we want to compute consensus tiles on each
# group independently and take the union of consensus tiles from each group.
<- "COVID_status"
groupColumn
# We set the threshold to require a tile must be open in at least 2 samples
# in its group to be retained (2/3=0.66)
<- 0.66
threshold
# Alternatively, you can set the threshold to 0 to keep the union of
# all samples' open tiles.
# This is equivalent to setting a threshold that would retain
# tiles that are open in at least one sample.
<- MOCHA::getSampleTileMatrix(
SampleTileMatrices
tileResults, cellPopulations = cellPopulations[1],
groupColumn = groupColumn,
threshold = threshold,
log2Intensity = TRUE
)
…and motifs to our SampleTileMatrices.
This info will aid further downstream analyses but is not required for differential accessibility nor coaccessibility.
# This function can also take any GRanges object
# and add annotations to its metadata.
<- MOCHA::annotateTiles(SampleTileMatrices)
SampleTileMatricesAnnotated
# Load a curated motif set from library(chromVARmotifs)
# included with ArchR installation
data(human_pwms_v2)
<- MOCHA::addMotifSet(
SampleTileMatricesAnnotated
SampleTileMatricesAnnotated, pwms = human_pwms_v2,
w = 7 # weight parameter for motifmatchr
)
Here we plot coverage at a specific region and gene by infection stage.
<- MOCHA::extractRegion(
countSE SampleTileObj = SampleTileMatrices,
cellPopulations = "CD16 Mono",
region = "chr3:38137866-38139912",
groupColumn = "COVID_status",
numCores = numCores,
sampleSpecific = FALSE
)dev.off()
pdf("ExamplePlot.pdf")
# Note that to show specific genes with the option' whichGene'
# you must have the package RMariaDB installed
::plotRegion(countSE = countSE, whichGene = "MYD88")
MOCHAdev.off()
Here we are comparing MAIT cells between samples where our groupColumn “COVID_status” is Positive (our foreground) to Negative samples (our background).
<- "MAIT"
cellPopulation <- "Positive"
foreground <- "Negative"
background
# Standard output will display the number of tiles found below a false-discovery rate threshold.
# This parameter does not filter results and only affects the afforementioned message.
<- 0.2
fdrToDisplay
# Choose to output a GRanges or data.frame.
# Default is TRUE
<- TRUE
outputGRanges
<- MOCHA::getDifferentialAccessibleTiles(
differentials SampleTileObj = SampleTileMatrices,
cellPopulation = cellPopulation,
groupColumn = groupColumn,
foreground = foreground,
background = background,
fdrToDisplay = fdrToDisplay,
outputGRanges = outputGRanges,
numCores = numCores
)
…between input regions (tiles) and their neighboring regions within a window.
Here we give the first ten differential tiles as our input regions.
<- head(differentials, 10)
regions
# Alternatively, define regions as a character vector
# of region strings in the format "chr:start-end"
# regions <- c(
# "chrY:7326500-7326999",
# "chrY:7327000-7327499",
# "chrY:7339500-7339999",
# "chrY:7344500-7344999"
# )
<- MOCHA::getCoAccessibleLinks(
links SampleTileObj = SampleTileMatrices,
cellPopulation = cellPopulation,
regions = regions,
windowSize = 1 * 10^6,
numCores = numCores,
verbose = TRUE
)
# Optionally filter these links by their absolute
# correlation - this output also adds the chromosome,
# start, and end site of each link to the table.
::filterCoAccessibleLinks(links, threshold = 0.7) MOCHA