DTU analysis using DRIMSeq

Load required libraries

library(data.table)
library(GenomicFeatures)
library(tximport)
library(DRIMSeq)
library(stageR)
library(dplyr)
library(reshape2)
library(ggplot2)
library(ggbeeswarm)

Import input data

Load clinical information and create a simple sample data.frame samps

dir = "../../"
sampleData = paste0(dir, "clinical.txt")
sampleData = fread(sampleData)
rownames(sampleData) = sampleData$ENA_RUN
sampleData$individual = as.factor(sampleData$individual)
sampleData$paris_classification = as.factor(sampleData$paris_classification)

# Use relevel() to set adjacent normal samples as reference
sampleData$paris_classification = relevel(sampleData$paris_classification, ref = "normal")
samps = data.frame(sample_id = sampleData$ENA_RUN, group = sampleData$paris_classification)

> samps
    sample_id  group
1  ERR2675454  0-IIa
2  ERR2675455 normal
3  ERR2675458  0-IIa
4  ERR2675459 normal
5  ERR2675460  0-IIa
6  ERR2675461 normal
7  ERR2675464  0-IIa
8  ERR2675465 normal
9  ERR2675468  0-IIa
10 ERR2675469 normal
11 ERR2675472  0-IIa
12 ERR2675473 normal
13 ERR2675476  0-IIa
14 ERR2675477 normal
15 ERR2675478  0-IIa
16 ERR2675479 normal
17 ERR2675480  0-IIa
18 ERR2675481 normal
19 ERR2675484  0-IIa
20 ERR2675485 normal

Load TxDB and construct a two column data.frame txdf with the transcript and gene identifiers

txdb.filename = "/home/USER/db/refanno/gencode.v33.annotation.sqlite"
txdb = loadDb(txdb.filename)

k = keys(txdb, keytype = "GENEID")
txdf = AnnotationDbi::select(txdb, k, "TXNAME", "GENEID")

> head(txdf)
              GENEID            TXNAME
1 ENSG00000000003.15 ENST00000373020.9
2 ENSG00000000003.15 ENST00000612152.4
3 ENSG00000000003.15 ENST00000614008.4
4 ENSG00000000003.15 ENST00000496771.5
5 ENSG00000000003.15 ENST00000494424.1
6  ENSG00000000005.6 ENST00000373031.5

Define file path and import Salmon NumReads values with tximport which contains the transcript-level counts (txIn = TRUE), abundances and average transcript lengths, then generate transcript-level estimated counts (txOut = TRUE) using the dtuScaledTPM method

files = file.path(paste0(dir, "salmon"), list.files(paste0(dir, "salmon")), "quant.sf")
names(files) = list.files(paste0(dir, "salmon"))

txi = tximport(files, type = "salmon", tx2gene = txdf[,2:1], 
txIn = TRUE, txOut = TRUE, countsFromAbundance = "dtuScaledTPM")

Buld a data.frame cts containing the estimated counts and remove rows that has zero counts across all samples

cts = txi$counts
cts = cts[rowSums(cts) > 0,]

The experiments have between 17.6 and 36.2 million paired-end reads that were mapped to the transcriptome using Salmon

> dim(cts)
[1] 159053     20

> cts[1:10,1:5]
                  ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328.2   0.000000   0.000000   0.000000   0.000000   0.000000
ENST00000488147.1 226.501358 252.238109 204.124333 373.804412 284.304731
ENST00000469289.1   0.000000   1.657100   0.000000   0.000000   0.000000
ENST00000466430.5   3.501618   1.207668   6.291761   2.676865  11.539977
ENST00000477740.5   0.000000   0.000000   0.000000   0.000000   0.000000
ENST00000471248.1   0.000000   0.000000   0.000000   1.309356   1.373414
ENST00000610542.1   0.000000   0.000000   0.000000   0.000000   0.000000
ENST00000453576.2   0.000000   7.342963   0.000000   0.000000   0.000000
ENST00000495576.1  10.721771   0.000000   0.000000   0.000000   0.000000
ENST00000442987.3  58.965601   0.000000  34.048815   0.000000   0.000000

> range(colSums(cts)/1e6)
[1] 17.63626 36.28417

DRIMSeq procedure

Create a data.frame counts containing the gene_id (Gene ID) and feature_id (Transcript ID) information from subsetted txdf and estimated counts from cts

txdf.sub = txdf[match(rownames(cts),txdf$TXNAME),]
counts = data.frame(gene_id = txdf.sub$GENEID, feature_id = txdf.sub$TXNAME, cts)

> counts[1:5,1:5]
                            gene_id        feature_id ERR2675454 ERR2675455 ERR2675458
ENST00000456328.2 ENSG00000223972.5 ENST00000456328.2   0.000000   0.000000   0.000000
ENST00000488147.1 ENSG00000227232.5 ENST00000488147.1 226.501358 252.238109 204.124333
ENST00000469289.1 ENSG00000243485.5 ENST00000469289.1   0.000000   1.657100   0.000000
ENST00000466430.5 ENSG00000238009.6 ENST00000466430.5   3.501618   1.207668   6.291761
ENST00000477740.5 ENSG00000238009.6 ENST00000477740.5   0.000000   0.000000   0.000000

Then create a dmDSdata object d

d = dmDSdata(counts = counts, samples = samps)

> d
An object of class dmDSdata 
with 37333 genes and 20 samples
* data accessors: counts(), samples()

We will filter the object using dmFilter before running procedures to estimate model parameters. This speeds up the fitting and removes transcripts that may be troublesome for parameter estimation. We first define n to be the total number of samples, and n.small to be the sample size of the smallest group (in this dataset, both groups have 10 samples). All three of the possible filters are implemented.

A transcript has a count of at least 10 in at least n.small samples
A transcript has a relative abundance proportion of at least 0.1 in at least n.small samples
The total count of the corresponding gene is at least 10 in all n samples

n = nrow(samps)
n.small = min(table(samps$group))

d = dmFilter(d,
        min_samps_feature_expr = n.small, min_feature_expr = 10,
        min_samps_feature_prop = n.small, min_feature_prop = 0.1,
        min_samps_gene_expr = n, min_gene_expr = 10)

The number of genes in d reduced from 37333 to 7467. After filtering, the object d only contains genes that have more than one isoform, which makes sense as we are testing for differential transcript usage. We can find out how many of the remaining genes have N isoforms by tabulating the number of times we see a gene ID, then tabulating the output again

> d
An object of class dmDSdata 
with 7467 genes and 20 samples
* data accessors: counts(), samples()

> table(table(counts(d)$gene_id))
   2    3    4    5    6 
4281 2286  752  137   11

Create a design matrix, using a design formula and sample information from samps. We then use the following three functions to estimate the model parameters and test for DTU.

dmPrecision: estimate the precision (higher dispersion is associated with lower precision)
dmFit: fit regression coefficients
dmTest: perform null hypothesis testing on the coefficient of interest.

Here, we test the coefficient associated with the difference between group 0-IIa and normal

design = model.matrix(~group, data = samps)

set.seed(12345)
system.time({
        d <- dmPrecision(d, design = design)    # Estimate the precision (Higher dispersion is associated with lower precision)
        d <- dmFit(d, design = design)          # Fit regression coefficients
        d <- dmTest(d, coef = "group0-IIa")     # Perform null hypothesis testing on the coefficient of interest
})

The system.time was used to time the analysis and it takes about 15 minutes

! Using a subset of 0.1 genes to estimate common precision !
! Using common_precision = 4.7973 as prec_init !
! Using 1.150196 as a shrinkage factor !

   user  system elapsed 
900.063   8.770 862.508

Use results function to build test result tables. The res.g contains a single p-value per gene, which tests whether there is any differential transcript usage within the gene, and res.t contains a single p-value per transcript, which tests whether the proportions for this transcript changed within the gene

# Single p-value per gene
res.g = DRIMSeq::results(d)

# Single p-value per transcript
res.t = DRIMSeq::results(d, level = "feature")

Use is.na to check if the pvalue column contain NA values, in this case, there are none

> table(is.na(res.g$pvalue))
FALSE 
 7467 

> table(is.na(res.t$pvalue))
FALSE 
19179

We can use the following function no.na to turn p-values that are NA into 1 to avoid problems in down-stream analysis

no.na <- function(x) ifelse(is.na(x), 1, x) res$pvalue <- no.na(res$pvalue) res.txp$pvalue <- no.na(res.txp$pvalue)

DRIMSeq test identified 125 genes showing evidence of isoform switching involving 199 transcripts

> table(res.g$adj_pvalue < 0.05)
FALSE  TRUE 
 7342   125 
 
> table(res.t$adj_pvalue < 0.05)
FALSE  TRUE 
18980   199

Post-hoc filtering on the standard deviation in proportions

This post-hoc filtering step is not part of the DRIMSeq modeling steps but suggested in Michael Love's DTU workflow to improved the false discovery rate (FDR) and overall false discovery rate (OFDR) control

The smallProportionSD set the p-values and adjusted p-values for transcripts with small per-sample proportion SD to 1

smallProportionSD <- function(d, filter = 0.1) {
        # Generate count table
        cts = as.matrix(subset(counts(d), select = -c(gene_id, feature_id)))
        # Summarise count total per gene
        gene.cts = rowsum(cts, counts(d)$gene_id)
        # Use total count per gene as count per transcript
        total.cts = gene.cts[match(counts(d)$gene_id, rownames(gene.cts)),]
        # Calculate proportion of transcript in gene
        props = cts/total.cts
        rownames(props) = rownames(total.cts)
        
        # Calculate standard deviation
        propSD = sqrt(rowVars(props))
        # Check if standard deviation of per-sample proportions is < 0.1
        propSD < filter
}

filt = smallProportionSD(d)

res.t.filt = DRIMSeq::results(d, level = "feature")
res.t.filt$pvalue[filt] = 1
res.t.filt$adj_pvalue[filt] = 1

Of the 19179 transcripts, 6504 have small per-sample proportion

> table(filt)
filt
FALSE  TRUE 
12290  6504

There are 199 transcripts found to be involved in switching before filtering and 193 after filtering

> table(res.t$adj_pvalue < 0.05)
FALSE  TRUE 
18980   199 

> table(res.t.filt$adj_pvalue < 0.05)
FALSE  TRUE 
18986   193

stageR procedure

Set up a function to strip away the version numbers in the Ensembl gene and transcript IDs

strp <- function(x) substr(x,1,15)

Construct a vector of per-gene p-values for the screening stage

pScreen = res.g$pvalue
names(pScreen) = strp(res.g$gene_id)

> head(pScreen,10)
ENSG00000000003 ENSG00000000457 ENSG00000000460 ENSG00000000971 ENSG00000001084 
   4.345435e-01    2.009637e-01    5.436509e-03    8.602669e-01    8.098329e-05 
ENSG00000001167 ENSG00000001461 ENSG00000001497 ENSG00000001617 ENSG00000001626 
   9.989143e-01    2.201689e-01    8.968703e-02    9.702887e-01    9.714687e-01

Construct a one column matrix of the per-transcript confirmation p-values

pConfirmation = matrix(res.t.filt$pvalue, ncol = 1)
dimnames(pConfirmation) = list(strp(res.t.filt$feature_id), "transcript")

> head(pConfirmation)
                transcript
ENST00000373020  1.0000000
ENST00000614008  1.0000000
ENST00000367771  0.2009637
ENST00000367772  0.2009637
ENST00000359326  0.1681221
ENST00000496973  0.2013211

The res.t is used twice to construct a 4-column data.frame that contain both original IDs and IDs without version numbers

tx2gene = data.frame(res.t[,c("feature_id", "gene_id")], 
res.t[,c("feature_id", "gene_id")])

for (i in 1:2) tx2gene[,i] = strp(tx2gene[,i])

> head(tx2gene)
       feature_id         gene_id       feature_id.1          gene_id.1
1 ENST00000373020 ENSG00000000003  ENST00000373020.9 ENSG00000000003.15
2 ENST00000614008 ENSG00000000003  ENST00000614008.4 ENSG00000000003.15
3 ENST00000367771 ENSG00000000457 ENST00000367771.11 ENSG00000000457.14
4 ENST00000367772 ENSG00000000457  ENST00000367772.8 ENSG00000000457.14
5 ENST00000359326 ENSG00000000460  ENST00000359326.9 ENSG00000000460.17
6 ENST00000496973 ENSG00000000460  ENST00000496973.5 ENSG00000000460.17

Construct a stageRTx object and perform the stageR analysis. We specify alpha to be 0.05

stageRObj = stageRTx(pScreen = pScreen, 
pConfirmation = pConfirmation, 
pScreenAdjusted = FALSE, 
tx2gene = tx2gene[,1:2])

stageRObj = stageWiseAdjustment(stageRObj, method = "dtu", alpha = 0.05)

drim.padj = getAdjustedPValues(stageRObj, order = FALSE, onlySignificantGenes = TRUE)
drim.padj = merge(tx2gene, drim.padj, by.x = c("gene_id","feature_id"), by.y = c("geneID","txID"))

> head(drim.padj)
          gene_id      feature_id      feature_id.1          gene_id.1        gene transcript
1 ENSG00000001084 ENST00000616923 ENST00000616923.5 ENSG00000001084.13 0.008398642 0.00000000
2 ENSG00000001084 ENST00000650454 ENST00000650454.1 ENSG00000001084.13 0.008398642 0.00000000
3 ENSG00000006704 ENST00000265755 ENST00000265755.7 ENSG00000006704.10 0.007517145 1.00000000
4 ENSG00000006704 ENST00000424337 ENST00000424337.6 ENSG00000006704.10 0.007517145 1.00000000
5 ENSG00000006704 ENST00000470715 ENST00000470715.1 ENSG00000006704.10 0.007517145 0.04782923
6 ENSG00000006704 ENST00000486086 ENST00000486086.1 ENSG00000006704.10 0.007517145 1.00000000

The data.frame drim.padj summarizes the adjusted p-values from the two-stage analysis. Only genes that passed the filter are included in the table. There are 125 screened genes in this dataset, and 187 transcripts pass the confirmation stage on a target 5% overall false discovery rate (OFDR). This means that, in expectation, no more than 5% of the genes that pass screening will either (1) not contain any DTU, so be falsely screened genes, or (2) contain a falsely confirmed transcript.

> length(unique(drim.padj[drim.padj$gene < 0.05,]$gene_id))
[1] 125

> table(drim.padj$transcript < 0.05)
FALSE  TRUE 
  142   187

Exporting results

We construct a drimData data.frame to store per-group average proportion and per-sample proportion of transcripts, and a drimDTU data.frame to store the DRIMSeq+stageR results. We merge both data.frame with a annoData data.frame that contain per-transcript annotation derived from GENCODE GTF file before exporting the results in tabular format

annoData = "/home/USER/db/refanno/gencode.v33.annotation_transcripts.txt"
annoData = data.frame(fread(annoData))

# Create a data.frame containing counts in long-format data with reshape2::melt
drim.prop = reshape2::melt(counts[counts$feature_id %in% proportions(d)$feature_id,], id = c("gene_id", "feature_id"))
drim.prop = drim.prop[order(drim.prop$gene_id, drim.prop$variable, drim.prop$feature_id),]

# Calculate proportions from counts
system.time({
drim.prop = drim.prop %>%
        group_by(gene_id, variable) %>%
        mutate(total = sum(value)) %>%
        group_by(variable, add=TRUE) %>%
        mutate(prop = value/total)
})

# Convert the data.frame to wide-format data with reshape2::dcast
drim.prop = reshape2::dcast(drim.prop[,c(1,2,3,6)], gene_id + feature_id ~ variable)

> drim.prop[1:10, 1:5]
              gene_id         feature_id ERR2675454 ERR2675455 ERR2675458
1  ENSG00000000003.15  ENST00000373020.9  0.7097836 0.93399424  0.8859886
2  ENSG00000000003.15  ENST00000614008.4  0.2902164 0.06600576  0.1140114
3  ENSG00000000457.14 ENST00000367771.11  0.4028474 0.33172103  0.5226108
4  ENSG00000000457.14  ENST00000367772.8  0.5971526 0.66827897  0.4773892
5  ENSG00000000460.17  ENST00000359326.9  0.4202506 0.33217442  0.4042091
6  ENSG00000000460.17  ENST00000413811.3  0.3384826 0.51180314  0.3758459
7  ENSG00000000460.17  ENST00000459772.5  0.0000000 0.00000000  0.0000000
8  ENSG00000000460.17  ENST00000496973.5  0.2412668 0.15602244  0.2199450
9  ENSG00000000971.16  ENST00000367429.9  0.8762398 0.70118186  0.8358671
10 ENSG00000000971.16  ENST00000630130.2  0.1237602 0.29881814  0.1641329

# Average proportions calculated from fitted proportions
drim.mean = as.data.frame(sapply( levels(samps$group), 
function(lvl) rowMeans(proportions(d)[, 3:ncol(proportions(d))][, samps$group == lvl, drop = FALSE]) ))

# log2 fold change in proportions
drim.log2fcp = log2(drim.mean[2]/drim.mean[1])
colnames(drim.log2fcp) = "log2fcp"
rownames(drim.log2fcp) = proportions(d)$feature_id

# Merge to create result data
drimData = cbind(drim.prop[,1:2], drim.mean, drim.prop[, 3:ncol(drim.prop)])
drimData = merge(annoData, drimData, by.x = c("GeneID","TranscriptID"), by.y = c("gene_id","feature_id"))
drimData = drimData[order(drimData$GeneID, drimData$TranscriptID),]

> head(drim.mean)
     normal     0-IIa
1 0.8339570 0.7913571
2 0.1660430 0.2086429
3 0.5644001 0.4429697
4 0.4355999 0.5570303
5 0.1443673 0.2225594
6 0.1683514 0.2337224

> head(drim.log2fcp)
                       log2fcp
ENST00000373020.9  -0.07564415
ENST00000614008.4   0.32947905
ENST00000367771.11 -0.34951022
ENST00000367772.8   0.35475215
ENST00000359326.9   0.62444682
ENST00000496973.5   0.47332007

> drimData[1:10,1:16]
               GeneID       TranscriptID GeneSymbol Chromosome     Start       End TranscriptName                   Class Strand Length    normal      0-IIa ERR2675454 ERR2675455 ERR2675458 ERR2675459
1  ENSG00000000003.15  ENST00000373020.9     TSPAN6       chrX 100627108 100636806     TSPAN6-201          protein_coding      -   9698 0.8339570 0.79135714  0.7097836 0.93399424  0.8859886 0.93240170
2  ENSG00000000003.15  ENST00000614008.4     TSPAN6       chrX 100632063 100637104     TSPAN6-205          protein_coding      -   5041 0.1660430 0.20864286  0.2902164 0.06600576  0.1140114 0.06759830
3  ENSG00000000457.14 ENST00000367771.11      SCYL3       chr1 169849631 169893896      SCYL3-202          protein_coding      -  44265 0.5644001 0.44296969  0.4028474 0.33172103  0.5226108 0.44438879
4  ENSG00000000457.14  ENST00000367772.8      SCYL3       chr1 169853074 169893959      SCYL3-203          protein_coding      -  40885 0.4355999 0.55703031  0.5971526 0.66827897  0.4773892 0.55561121
5  ENSG00000000460.17  ENST00000359326.9   C1orf112       chr1 169795040 169854080   C1orf112-202          protein_coding      +  59040 0.1443673 0.22255940  0.4202506 0.33217442  0.4042091 0.00000000
6  ENSG00000000460.17  ENST00000413811.3   C1orf112       chr1 169795921 169853037   C1orf112-203          protein_coding      +  57116 0.1683514 0.23372236  0.3384826 0.51180314  0.3758459 0.37564128
7  ENSG00000000460.17  ENST00000459772.5   C1orf112       chr1 169795079 169854080   C1orf112-204 nonsense_mediated_decay      +  59001 0.3032860 0.05131806  0.0000000 0.00000000  0.0000000 0.56039759
8  ENSG00000000460.17  ENST00000496973.5   C1orf112       chr1 169795043 169804347   C1orf112-208          protein_coding      +   9304 0.3839954 0.49240018  0.2412668 0.15602244  0.2199450 0.06396113
9  ENSG00000000971.16  ENST00000367429.9        CFH       chr1 196652043 196747504        CFH-202          protein_coding      +  95461 0.7929910 0.78007665  0.8762398 0.70118186  0.8358671 0.58161454
10 ENSG00000000971.16  ENST00000630130.2        CFH       chr1 196652045 196701566        CFH-206          protein_coding      +  49521 0.2070090 0.21992335  0.1237602 0.29881814  0.1641329 0.41838546

# Merge to create result data
drimDTU = merge(drim.padj[,c("feature_id.1","gene_id.1","gene","transcript")], drim.log2fcp, by.x = "feature_id.1", by.y = "row.names")
drimDTU = merge(annoData, drimDTU, by.x = c("GeneID","TranscriptID"), by.y = c("gene_id.1", "feature_id.1"))
drimDTU = drimDTU[order(drimDTU$GeneID, drimDTU$TranscriptID),]

> head(drimDTU)
              GeneID      TranscriptID GeneSymbol Chromosome    Start      End TranscriptName           Class Strand Length        gene transcript    log2fcp
1 ENSG00000001084.13 ENST00000616923.5       GCLC       chr6 53497384 53548206       GCLC-212  protein_coding      -  50822 0.008398642 0.00000000  2.3831147
2 ENSG00000001084.13 ENST00000650454.1       GCLC       chr6 53497341 53545101       GCLC-214  protein_coding      -  47760 0.008398642 0.00000000 -0.4601369
3 ENSG00000006704.10 ENST00000265755.7   GTF2IRD1       chr7 74453790 74602590   GTF2IRD1-201  protein_coding      + 148800 0.007517145 1.00000000  1.0056618
4 ENSG00000006704.10 ENST00000424337.6   GTF2IRD1       chr7 74454109 74602601   GTF2IRD1-202  protein_coding      + 148492 0.007517145 1.00000000 -0.5964554
5 ENSG00000006704.10 ENST00000470715.1   GTF2IRD1       chr7 74545762 74601106   GTF2IRD1-204  protein_coding      +  55344 0.007517145 0.04782923  2.5227831
6 ENSG00000006704.10 ENST00000486086.1   GTF2IRD1       chr7 74600960 74602583   GTF2IRD1-206 retained_intron      +   1623 0.007517145 1.00000000 -1.1868560

Write results to files

write.table(drimData, file="DTU_DRIMSeq-stageR_means_and_proportions.txt", sep = "\t", quote = F, row.names = F, col.names = T)
write.table(drimDTU, file="DTU_DRIMSeq-stageR_results.txt", sep = "\t", quote = F, row.names = F, col.names = T)

Exploring results

We can use the DRIMSeq's plotProportions to plot the transcript proportions of a given gene. In the example below, we select a gene from the data.frame drim.padj that has smallest transcript and gene adjusted p-values

# Plot the estimated proportions for one of the significant genes, where we can see evidence of switching
gene_id = unique(drim.padj[order(drim.padj$transcript, drim.padj$gene),]$gene_id.1)[1]

png("plotProportions.DRIMSeq-stageR.1.png", width=6, height=6, units = "in", res = 300)
plotProportions(d, gene_id = gene_id, 
group_variable = "group", plot_type = "boxplot1")
dev.off()

We can also use ggplot2 to plot similar figure with data from drim.prop

# We will use this function in both DRIMSeq & DEXSeq workflows
plotExpression <- function(expData = NULL, geneID = NULL, samps = NULL, isProportion = FALSE) {
        colnames(expData)[1:2] = c("gid","tid")
        sub = subset(expData, gid == geneID)
        sub = reshape2::melt(sub, id = c("gid", "tid"))
        sub = merge(samps, sub, by.x = "sample_id", by.y = "variable")
        if(!isProportion) {
                sub$value = log(sub$value)
        }

        clrs = c("dodgerblue3", "maroon2",  "forestgreen", "darkorange1", "blueviolet", "firebrick2",
"deepskyblue", "orchid2", "chartreuse3", "gold", "slateblue1", "tomato" , "blue", "magenta", "green3",
"yellow", "purple3", "red" ,"darkslategray1", "lightpink1", "lightgreen", "khaki1", "plum3", "salmon")

        p = ggplot(sub, aes(tid, value, color = group, fill = group)) +
        geom_boxplot(alpha = 0.4, outlier.shape = NA, width = 0.8, lwd = 0.5) +
        stat_summary(fun = mean, geom = "point", color = "black", shape = 5, size = 3, position=position_dodge(width = 0.8)) +
        scale_color_manual(values = clrs) + scale_fill_manual(values = clrs) +
        geom_quasirandom(color = "black", size = 1, dodge.width = 0.8) + theme_bw() +
        ggtitle(geneID) + xlab("Transcripts")

        if(!isProportion) {
                p = p + ylab("log(Expression)")
        } else {
                p = p + ylab("Proportions")
        }
        p
}

gene_id = unique(drim.padj[order(drim.padj$transcript, drim.padj$gene),]$gene_id.1)[1]
png("plotProportions.DRIMSeq-stageR.2.png", width=6, height=6, units = "in", res = 300)
plotExpression(drim.prop, gene_id, samps, isProportion = TRUE)
dev.off()

Session info

> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Ubuntu 18.04.4 LTS

Matrix products: default
BLAS/LAPACK: /home/USER/miniconda3/lib/libopenblasp-r0.3.9.so

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
 [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
 [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] ggbeeswarm_0.6.0            ggplot2_3.3.0              
 [3] reshape2_1.4.4              dplyr_0.8.5                
 [5] stageR_1.8.0                SummarizedExperiment_1.16.1
 [7] DelayedArray_0.12.3         BiocParallel_1.20.1        
 [9] matrixStats_0.56.0          DRIMSeq_1.14.0             
[11] tximport_1.14.2             GenomicFeatures_1.38.2     
[13] AnnotationDbi_1.48.0        Biobase_2.46.0             
[15] GenomicRanges_1.38.0        GenomeInfoDb_1.22.1        
[17] IRanges_2.20.2              S4Vectors_0.24.4           
[19] BiocGenerics_0.32.0         data.table_1.12.8          

loaded via a namespace (and not attached):
 [1] httr_1.4.1               edgeR_3.28.1             jsonlite_1.6.1          
 [4] bit64_0.9-7              assertthat_0.2.1         askpass_1.1             
 [7] BiocFileCache_1.10.2     blob_1.2.1               vipor_0.4.5             
[10] GenomeInfoDbData_1.2.2   Rsamtools_2.2.3          progress_1.2.2          
[13] pillar_1.4.4             RSQLite_2.2.0            lattice_0.20-41         
[16] glue_1.4.0               limma_3.42.2             digest_0.6.25           
[19] XVector_0.26.0           colorspace_1.4-1         Matrix_1.2-18           
[22] plyr_1.8.6               XML_3.99-0.3             pkgconfig_2.0.3         
[25] biomaRt_2.42.1           zlibbioc_1.32.0          purrr_0.3.4             
[28] scales_1.1.0             tibble_3.0.1             openssl_1.4.1           
[31] farver_2.0.3             ellipsis_0.3.0           withr_2.2.0             
[34] magrittr_1.5             crayon_1.3.4             memoise_1.1.0           
[37] beeswarm_0.2.3           tools_3.6.3              prettyunits_1.1.1       
[40] hms_0.5.3                lifecycle_0.2.0          stringr_1.4.0           
[43] munsell_0.5.0            locfit_1.5-9.4           Biostrings_2.54.0       
[46] compiler_3.6.3           rlang_0.4.6              grid_3.6.3              
[49] RCurl_1.98-1.2           rappdirs_0.3.1           labeling_0.3            
[52] tcltk_3.6.3              bitops_1.0-6             gtable_0.3.0            
[55] DBI_1.1.0                curl_4.3                 R6_2.4.1                
[58] GenomicAlignments_1.22.1 rtracklayer_1.46.0       bit_1.1-15.2            
[61] readr_1.3.1              stringi_1.4.6            Rcpp_1.0.4.6            
[64] vctrs_0.2.4              dbplyr_1.4.3             tidyselect_1.0.0

PreviousDifferential transcript usage (DTU) analysis NextDTU analysis using DEXSeq

Last updated 5 years ago

Was this helpful?