DTU analysis using DEXSeq

Load required libraries

library(data.table)
library(GenomicFeatures)
library(tximport)
library(DEXSeq)
library(stageR)
library(reshape2)
library(ggplot2)
library(ggbeeswarm)

Import input data

Load clinical information and create a simple sample data.frame samps

dir = "../../"
sampleData = paste0(dir, "clinical.txt")
sampleData = fread(sampleData)
rownames(sampleData) = sampleData$ENA_RUN
sampleData$individual = as.factor(sampleData$individual)
sampleData$paris_classification = as.factor(sampleData$paris_classification)

# Use relevel() to set adjacent normal samples as reference
sampleData$paris_classification = relevel(sampleData$paris_classification, ref = "normal")
samps = data.frame(sample_id = sampleData$ENA_RUN, group = sampleData$paris_classification)

> samps
    sample_id  group
1  ERR2675454  0-IIa
2  ERR2675455 normal
3  ERR2675458  0-IIa
4  ERR2675459 normal
5  ERR2675460  0-IIa
6  ERR2675461 normal
7  ERR2675464  0-IIa
8  ERR2675465 normal
9  ERR2675468  0-IIa
10 ERR2675469 normal
11 ERR2675472  0-IIa
12 ERR2675473 normal
13 ERR2675476  0-IIa
14 ERR2675477 normal
15 ERR2675478  0-IIa
16 ERR2675479 normal
17 ERR2675480  0-IIa
18 ERR2675481 normal
19 ERR2675484  0-IIa
20 ERR2675485 normal

Load TxDB and construct a two column data.frame txdf with the transcript and gene identifiers

txdb.filename = "/home/USER/db/refanno/gencode.v33.annotation.sqlite"
txdb = loadDb(txdb.filename)

k = keys(txdb, keytype = "GENEID")
txdf = AnnotationDbi::select(txdb, k, "TXNAME", "GENEID")

> head(txdf)
              GENEID            TXNAME
1 ENSG00000000003.15 ENST00000373020.9
2 ENSG00000000003.15 ENST00000612152.4
3 ENSG00000000003.15 ENST00000614008.4
4 ENSG00000000003.15 ENST00000496771.5
5 ENSG00000000003.15 ENST00000494424.1
6  ENSG00000000005.6 ENST00000373031.5

Define file path and import Salmon NumReads values with tximport which contains the transcript-level counts (txIn = TRUE), abundances and average transcript lengths, then generate transcript-level estimated counts (txOut = TRUE) using the dtuScaledTPM method

files = file.path(paste0(dir, "salmon"), list.files(paste0(dir, "salmon")), "quant.sf")
names(files) = list.files(paste0(dir, "salmon"))

txi = tximport(files, type = "salmon", tx2gene = txdf[,2:1], 
txIn = TRUE, txOut = TRUE, countsFromAbundance = "dtuScaledTPM")

Buld a data.frame cts containing the estimated counts and remove rows that has zero counts across all samples

cts = txi$counts
cts = cts[rowSums(cts) > 0,]

The experiments have between 17.6 and 36.2 million paired-end reads that were mapped to the transcriptome using Salmon

> dim(cts)
[1] 159053     20

> cts[1:10,1:5]
                  ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460
ENST00000456328.2   0.000000   0.000000   0.000000   0.000000   0.000000
ENST00000488147.1 226.501358 252.238109 204.124333 373.804412 284.304731
ENST00000469289.1   0.000000   1.657100   0.000000   0.000000   0.000000
ENST00000466430.5   3.501618   1.207668   6.291761   2.676865  11.539977
ENST00000477740.5   0.000000   0.000000   0.000000   0.000000   0.000000
ENST00000471248.1   0.000000   0.000000   0.000000   1.309356   1.373414
ENST00000610542.1   0.000000   0.000000   0.000000   0.000000   0.000000
ENST00000453576.2   0.000000   7.342963   0.000000   0.000000   0.000000
ENST00000495576.1  10.721771   0.000000   0.000000   0.000000   0.000000
ENST00000442987.3  58.965601   0.000000  34.048815   0.000000   0.000000

> range(colSums(cts)/1e6)
[1] 17.63626 36.28417

Pre-filter with DRIMSeq dmFilter

We will make use of the dmFilter in DRIMSeq to remove genes which had expression too low to have very much statistical power for detecting DTU, and transcripts which were very lowly expressed in both conditions, and so not contributing useful information for DTU

Create a data.frame counts containing the gene_id (Gene ID) and feature_id (Transcript ID) information from subsetted txdf and estimated counts from cts

txdf.sub = txdf[match(rownames(cts),txdf$TXNAME),]
counts = data.frame(gene_id = txdf.sub$GENEID, feature_id = txdf.sub$TXNAME, cts)

Then create a dmDSdata object d

d = DRIMSeq::dmDSdata(counts = counts, samples = samps)

> d
An object of class dmDSdata 
with 37333 genes and 20 samples
* data accessors: counts(), samples()

We will filter the object using dmFilter before running DEXSeq workflow

n = nrow(samps)
n.small = min(table(samps$group))

d = DRIMSeq::dmFilter(d,
        min_samps_feature_expr = n.small, min_feature_expr = 10,
        min_samps_feature_prop = n.small, min_feature_prop = 0.1,
        min_samps_gene_expr = n, min_gene_expr = 10)

> d
An object of class dmDSdata 
with 7467 genes and 20 samples
* data accessors: counts(), samples()

DEXSeq procedure

Create a count table from the filtered d and build a DEXSeqDataSet from countData, providing the sample information, a design formula, transcript ID and gene ID

countData = round(as.matrix(counts(d)[,-c(1:2)]))

dxd = DEXSeqDataSet(countData = countData, sampleData = samps, 
design = ~sample + exon + group:exon, 
featureID = counts(d)$feature_id, groupID = counts(d)$gene_id)

> countData[1:10, 1:6]
      ERR2675454 ERR2675455 ERR2675458 ERR2675459 ERR2675460 ERR2675461
 [1,]        468        650        324        616        321        327
 [2,]        191         46         42         45         39         83
 [3,]        195        150        180        192        144        120
 [4,]        290        302        164        240        136         45
 [5,]        134         62        103          0         37         85
 [6,]         77         29         56          6         19          0
 [7,]          0          0          0         53          0         31
 [8,]        108         96         96         35         70          0
 [9,]        458        140        359         81        135        143
[10,]         65         60         70         58         88          0

> dxd
class: DEXSeqDataSet 
dim: 19179 40 
metadata(1): version
assays(1): counts
rownames(19179): ENSG00000000003.15:ENST00000373020.9 ENSG00000000003.15:ENST00000614008.4 ... ENSG00000287778.1:ENST00000670155.1
  ENSG00000287778.1:ENST00000656610.1
rowData names(4): featureID groupID exonBaseMean exonBaseVar
colnames: NULL
colData names(4): sample sample_id group exon

Performs DEXSeq analysis using the following 3 functions: (1) estimation of size factors, (2) estimation of dispersion, then (3) perform a likelihood ratio test for differential exon usage (in this case, transcripts). The results tables (log2 fold changes and p-values) can be generated using the DEXSeqResults function

system.time({
        dxd = estimateSizeFactors(dxd)
        dxd = estimateDispersions(dxd)
        dxd = testForDEU(dxd, reducedModel = ~sample + exon)
})

dxr = DEXSeqResults(dxd, independentFiltering = FALSE)

> head(dxr)

LRT p-value: full vs reduced

DataFrame with 6 rows and 9 columns
                                                 groupID          featureID     exonBaseMean        dispersion              stat            pvalue
                                             <character>        <character>        <numeric>         <numeric>         <numeric>         <numeric>
ENSG00000000003.15:ENST00000373020.9  ENSG00000000003.15  ENST00000373020.9 474.457021737822 0.222303618613562  1.29107419859525  0.25585008389399
ENSG00000000003.15:ENST00000614008.4  ENSG00000000003.15  ENST00000614008.4 93.3828179421105 0.223583702134135  1.28408521927088 0.257140837485974
ENSG00000000457.14:ENST00000367771.11 ENSG00000000457.14 ENST00000367771.11  148.93020737817  0.48925966082589 0.448648620344443 0.502977402562165
ENSG00000000457.14:ENST00000367772.8  ENSG00000000457.14  ENST00000367772.8 190.762299165088  0.48880392279864 0.449052372082861 0.502785311576765
ENSG00000000460.17:ENST00000359326.9  ENSG00000000460.17  ENST00000359326.9 38.9549471552993  1.55960758819467   1.3332300593911 0.248231398819161
ENSG00000000460.17:ENST00000496973.5  ENSG00000000460.17  ENST00000496973.5 27.1949185511083  0.77408695259049  1.58680806617588 0.207782765521626
                                                   padj   genomicData       countData
                                              <numeric> <GRangesList>        <matrix>
ENSG00000000003.15:ENST00000373020.9  0.912299270122705               468:650:324:...
ENSG00000000003.15:ENST00000614008.4  0.912299270122705                 191:46:42:...
ENSG00000000457.14:ENST00000367771.11  0.98889530491025               195:150:180:...
ENSG00000000457.14:ENST00000367772.8   0.98889530491025               290:302:164:...
ENSG00000000460.17:ENST00000359326.9  0.905721245074065                134:62:103:...
ENSG00000000460.17:ENST00000496973.5  0.874404284201261                  77:29:56:...

Compute per-gene adjusted p-value using perGeneQValue, which aggregates evidence from multiple tests within a gene to a single p-value for the gene and then corrects for multiple testing across genes. Construct gene-level and transcript-level statistics

qval = perGeneQValue(dxr)
dxr.g = data.frame(gene = names(qval), qval)
dxr.t = as.data.frame(dxr[, c("featureID","groupID","pvalue")])

> dim(dxr.g)
[1] 7467    2

> head(dxr.g)
                                 gene      qval
ENSG00000000003.15 ENSG00000000003.15 1.0000000
ENSG00000000457.14 ENSG00000000457.14 1.0000000
ENSG00000000460.17 ENSG00000000460.17 0.4824143
ENSG00000000971.16 ENSG00000000971.16 1.0000000
ENSG00000001084.13 ENSG00000001084.13 1.0000000
ENSG00000001167.14 ENSG00000001167.14 1.0000000

> dim(dxr.t)
[1] 19179     3

> head(dxr.t)
                                               featureID            groupID    pvalue
ENSG00000000003.15:ENST00000373020.9   ENST00000373020.9 ENSG00000000003.15 0.2558501
ENSG00000000003.15:ENST00000614008.4   ENST00000614008.4 ENSG00000000003.15 0.2571408
ENSG00000000457.14:ENST00000367771.11 ENST00000367771.11 ENSG00000000457.14 0.5029774
ENSG00000000457.14:ENST00000367772.8   ENST00000367772.8 ENSG00000000457.14 0.5027853
ENSG00000000460.17:ENST00000359326.9   ENST00000359326.9 ENSG00000000460.17 0.2482314
ENSG00000000460.17:ENST00000496973.5   ENST00000496973.5 ENSG00000000460.17 0.2077828

DEXSeq test identified 92 genes showing evidence of isoform switching involving 186 transcripts

> dim(dxr.g[dxr.g$qval < 0.05,])
[1] 92  2

> dim(dxr[dxr$padj < 0.05,])
[1] 186   9

stageR procedure

Set up a function to strip away the version numbers in the Ensembl gene and transcript IDs

strp <- function(x) substr(x,1,15)

Construct a vector of per-gene p-values for the screening stage. Per-gene adjusted p-value qval is used here

pScreen = qval
names(pScreen) = strp(names(pScreen))

> head(pScreen,10)
ENSG00000000003 ENSG00000000457 ENSG00000000460 ENSG00000000971 ENSG00000001084 
      1.0000000       1.0000000       0.4824143       1.0000000       1.0000000 
ENSG00000001167 ENSG00000001461 ENSG00000001497 ENSG00000001617 ENSG00000001626 
      1.0000000       0.7230727       0.1993463       1.0000000       1.0000000

Construct a one column matrix of the per-transcript confirmation p-values

pConfirmation = matrix(dxr.t$pvalue, ncol=1)
dimnames(pConfirmation) = list(strp(dxr.t$featureID),"transcript")

> head(pConfirmation)
                transcript
ENST00000373020  0.2558501
ENST00000614008  0.2571408
ENST00000367771  0.5029774
ENST00000367772  0.5027853
ENST00000359326  0.2482314
ENST00000496973  0.2077828

The dxr.t is used twice to construct a 4-column data.frame that contain both original IDs and IDs without version numbers

tx2gene = data.frame(dxr.t[,c("featureID", "groupID")], 
dxr.t[,c("featureID", "groupID")])
for (i in 1:2) tx2gene[,i] = strp(tx2gene[,i])

> head(tx2gene)
                                            featureID         groupID        featureID.1          groupID.1
ENSG00000000003.15:ENST00000373020.9  ENST00000373020 ENSG00000000003  ENST00000373020.9 ENSG00000000003.15
ENSG00000000003.15:ENST00000614008.4  ENST00000614008 ENSG00000000003  ENST00000614008.4 ENSG00000000003.15
ENSG00000000457.14:ENST00000367771.11 ENST00000367771 ENSG00000000457 ENST00000367771.11 ENSG00000000457.14
ENSG00000000457.14:ENST00000367772.8  ENST00000367772 ENSG00000000457  ENST00000367772.8 ENSG00000000457.14
ENSG00000000460.17:ENST00000359326.9  ENST00000359326 ENSG00000000460  ENST00000359326.9 ENSG00000000460.17
ENSG00000000460.17:ENST00000496973.5  ENST00000496973 ENSG00000000460  ENST00000496973.5 ENSG00000000460.17

Construct a stageRTx object and perform the stageR analysis. We specify alpha to be 0.05

# qval used in pScreen, hence pScreenAdjusted=TRUE
stageRObj = stageRTx(pScreen = pScreen, 
pConfirmation = pConfirmation, 
pScreenAdjusted = TRUE, 
tx2gene = tx2gene[1:2])

stageRObj = stageWiseAdjustment(stageRObj, method = "dtu", alpha = 0.05)

dex.padj = getAdjustedPValues(stageRObj, order = FALSE, onlySignificantGenes = TRUE)
dex.padj = merge(tx2gene, dex.padj, by.x = c("groupID","featureID"), by.y = c("geneID","txID"))

> head(dex.padj)
          groupID       featureID        featureID.1          groupID.1        gene  transcript
1 ENSG00000010295 ENST00000336604  ENST00000336604.8 ENSG00000010295.19 0.005950696 0.002316827
2 ENSG00000010295 ENST00000356896  ENST00000356896.8 ENSG00000010295.19 0.005950696 0.019401970
3 ENSG00000010295 ENST00000396830  ENST00000396830.2 ENSG00000010295.19 0.005950696 1.000000000
4 ENSG00000010295 ENST00000488007  ENST00000488007.5 ENSG00000010295.19 0.005950696 1.000000000
5 ENSG00000010803 ENST00000326197 ENST00000326197.11 ENSG00000010803.16 0.002025851 1.000000000
6 ENSG00000010803 ENST00000337495  ENST00000337495.9 ENSG00000010803.16 0.002025851 1.000000000

The data.frame dex.padj summarizes the adjusted p-values from the two-stage analysis. Only genes that passed the filter are included in the table. There are 92 screened genes in this dataset, and 149 transcripts pass the confirmation stage on a target 5% overall false discovery rate (OFDR). This means that, in expectation, no more than 5% of the genes that pass screening will either (1) not contain any DTU, so be falsely screened genes, or (2) contain a falsely confirmed transcript.

> length(unique(dex.padj[dex.padj$gene < 0.05,]$groupID))
[1] 92

> table(dex.padj$transcript < 0.05)
FALSE  TRUE 
   92   149

Exporting results

We construct a dexData data.frame to store per-group normalised mean and normalised counts of all samples, and a dexDTU data.frame to store the DEXSeq+stageR results. We merge both data.frame with a annoData data.frame that contain per-transcript annotation derived from GENCODE GTF file before exporting the results in tabular format

annoData = "/home/USER/db/refanno/gencode.v33.annotation_transcripts.txt"
annoData = data.frame(fread(annoData))

dex.norm = cbind(as.data.frame(stringr::str_split_fixed(rownames(counts(dxd)), ":", 2)), as.data.frame(counts(dxd, normalized = TRUE))[,1:20])
colnames(dex.norm) = c("groupID", "featureID", as.character(colData(dxd)$sample_id)[1:20])
row.names(dex.norm) = NULL

> dex.norm[1:10, 1:5]
              groupID          featureID ERR2675454 ERR2675455 ERR2675458
1  ENSG00000000003.15  ENST00000373020.9  362.38678  642.07058  284.41435
2  ENSG00000000003.15  ENST00000614008.4  147.89717   45.43884   36.86853
3  ENSG00000000457.14 ENST00000367771.11  150.99449  148.17013  158.00797
4  ENSG00000000457.14  ENST00000367772.8  224.55591  298.31587  143.96282
5  ENSG00000000460.17  ENST00000359326.9  103.76032   61.24366   90.41567
6  ENSG00000000460.17  ENST00000496973.5   59.62347   28.64623   49.15804
7  ENSG00000000460.17  ENST00000459772.5    0.00000    0.00000    0.00000
8  ENSG00000000460.17  ENST00000413811.3   83.62772   94.82889   84.27092
9  ENSG00000000971.16  ENST00000367429.9  354.64348  138.29212  315.13813
10 ENSG00000000971.16  ENST00000630130.2   50.33150   59.26805   61.44755

# Per-group normalised mean
dex.mean = as.data.frame(sapply( levels(samps$group), 
function(lvl) rowMeans(dex.norm[, 3:ncol(dex.norm)][, samps$group == lvl, drop = FALSE]) ))

# log2 fold change in expression
dex.log2fc = log2(dex.mean[2]/dex.mean[1])
colnames(dex.log2fc) = "log2fc"
rownames(dex.log2fc) = dex.norm$featureID

# Merge to create result data
dexData = cbind(dex.norm[,1:2], dex.mean, dex.norm[, 3:ncol(dex.norm)])
dexData = merge(annoData, dexData, by.x = c("GeneID","TranscriptID"), by.y = c("groupID","featureID"))
dexData = dexData[order(dexData$GeneID, dexData$TranscriptID),]

> head(dex.mean)
     normal     0-IIa
1 577.08171 371.83234
2 100.65825  86.10738
3 162.36012 135.50030
4 197.67378 183.85082
5  29.25046  48.65944
6  20.04049  34.34935

> head(dex.log2fc)
                       log2fc
ENST00000373020.9  -0.6341234
ENST00000614008.4  -0.2252566
ENST00000367771.11 -0.2609013
ENST00000367772.8  -0.1045860
ENST00000359326.9   0.7342603
ENST00000496973.5   0.7773652

> dexData[1:10,1:16]
               GeneID       TranscriptID GeneSymbol Chromosome     Start       End TranscriptName                   Class Strand Length    normal     0-IIa ERR2675454 ERR2675455 ERR2675458 ERR2675459
1  ENSG00000000003.15  ENST00000373020.9     TSPAN6       chrX 100627108 100636806     TSPAN6-201          protein_coding      -   9698 577.08171 371.83234  362.38678  642.07058  284.41435 663.254670
2  ENSG00000000003.15  ENST00000614008.4     TSPAN6       chrX 100632063 100637104     TSPAN6-205          protein_coding      -   5041 100.65825  86.10738  147.89717   45.43884   36.86853  48.452046
3  ENSG00000000457.14 ENST00000367771.11      SCYL3       chr1 169849631 169893896      SCYL3-202          protein_coding      -  44265 162.36012 135.50030  150.99449  148.17013  158.00797 206.728728
4  ENSG00000000457.14  ENST00000367772.8      SCYL3       chr1 169853074 169893959      SCYL3-203          protein_coding      -  40885 197.67378 183.85082  224.55591  298.31587  143.96282 258.410910
5  ENSG00000000460.17  ENST00000359326.9   C1orf112       chr1 169795040 169854080   C1orf112-202          protein_coding      +  59040  29.25046  48.65944  103.76032   61.24366   90.41567   0.000000
6  ENSG00000000460.17  ENST00000413811.3   C1orf112       chr1 169795921 169853037   C1orf112-203          protein_coding      +  57116  61.65879  80.05450   83.62772   94.82889   84.27092  37.684924
7  ENSG00000000460.17  ENST00000459772.5   C1orf112       chr1 169795079 169854080   C1orf112-204 nonsense_mediated_decay      +  59001  42.78032  17.35794    0.00000    0.00000    0.00000  57.065743
8  ENSG00000000460.17  ENST00000496973.5   C1orf112       chr1 169795043 169804347   C1orf112-208          protein_coding      +   9304  20.04049  34.34935   59.62347   28.64623   49.15804   6.460273
9  ENSG00000000971.16  ENST00000367429.9        CFH       chr1 196652043 196747504        CFH-202          protein_coding      +  95461 203.52867 238.50167  354.64348  138.29212  315.13813  87.213682
10 ENSG00000000971.16  ENST00000630130.2        CFH       chr1 196652045 196701566        CFH-206          protein_coding      +  49521  64.09340  59.75558   50.33150   59.26805   61.44755  62.449303

# Merge to create result data
dexDTU = merge(dex.padj[,c("featureID.1","groupID.1","gene","transcript")], dex.log2fc, by.x = "featureID.1", by.y = "row.names")
dexDTU = merge(annoData, dexDTU, by.x = c("GeneID","TranscriptID"), by.y = c("groupID.1","featureID.1"))
dexDTU = dexDTU[order(dexDTU$GeneID, dexDTU$TranscriptID),]

> head(dexDTU)
              GeneID       TranscriptID GeneSymbol Chromosome    Start      End TranscriptName           Class Strand Length        gene  transcript     log2fc
1 ENSG00000010295.19  ENST00000336604.8      IFFO1      chr12  6539528  6556071      IFFO1-201  protein_coding      -  16543 0.005950696 0.002316827  3.2041293
2 ENSG00000010295.19  ENST00000356896.8      IFFO1      chr12  6539539  6556073      IFFO1-202  protein_coding      -  16534 0.005950696 0.019401970 -2.6415400
3 ENSG00000010295.19  ENST00000396830.2      IFFO1      chr12  6539607  6549113      IFFO1-203 retained_intron      -   9506 0.005950696 1.000000000 -0.5706681
4 ENSG00000010295.19  ENST00000488007.5      IFFO1      chr12  6539577  6556071      IFFO1-210 retained_intron      -  16494 0.005950696 1.000000000 -0.9167712
5 ENSG00000010803.16 ENST00000326197.11      SCMH1       chr1 41027202 41160898      SCMH1-201  protein_coding      - 133696 0.002025851 1.000000000 -0.5996156
6 ENSG00000010803.16  ENST00000337495.9      SCMH1       chr1 41027202 41242110      SCMH1-202  protein_coding      - 214908 0.002025851 1.000000000 -0.9283314

Write results to files

write.table(dexData, file="DTU_DEXSeq-stageR_means_and_counts.txt", sep = "\t", quote = F, row.names = F, col.names = T)
write.table(dexDTU, file="DTU_DEXSeq-stageR_results.txt", sep = "\t", quote = F, row.names = F, col.names = T)

Exploring results

We will use the plotExpression to plot the transcript expression of a given gene. In the example below, we select a gene from the data.frame dex.padj that has smallest transcript and gene adjusted p-values, and with data from dex.norm

# We will use this function in both DRIMSeq & DEXSeq workflows
plotExpression <- function(expData = NULL, geneID = NULL, samps = NULL, isProportion = FALSE) {
        colnames(expData)[1:2] = c("gid","tid")
        sub = subset(expData, gid == geneID)
        sub = reshape2::melt(sub, id = c("gid", "tid"))
        sub = merge(samps, sub, by.x = "sample_id", by.y = "variable")
        if(!isProportion) {
                sub$value = log(sub$value)
        }

        clrs = c("dodgerblue3", "maroon2",  "forestgreen", "darkorange1", "blueviolet", "firebrick2",
"deepskyblue", "orchid2", "chartreuse3", "gold", "slateblue1", "tomato" , "blue", "magenta", "green3",
"yellow", "purple3", "red" ,"darkslategray1", "lightpink1", "lightgreen", "khaki1", "plum3", "salmon")

        p = ggplot(sub, aes(tid, value, color = group, fill = group)) +
        geom_boxplot(alpha = 0.4, outlier.shape = NA, width = 0.8, lwd = 0.5) +
        stat_summary(fun = mean, geom = "point", color = "black", shape = 5, size = 3, position=position_dodge(width = 0.8)) +
        scale_color_manual(values = clrs) + scale_fill_manual(values = clrs) +
        geom_quasirandom(color = "black", size = 1, dodge.width = 0.8) + theme_bw() +
        ggtitle(geneID) + xlab("Transcripts")

        if(!isProportion) {
                p = p + ylab("log(Expression)")
        } else {
                p = p + ylab("Proportions")
        }
        p
}

# Plot the normalised counts for one of the significant genes, where we can see evidence of switching
gene_id = unique(dex.padj[order(dex.padj$transcript, dex.padj$gene),]$groupID.1)[1]

png("plotExpression.DEXSeq-stageR.png", width=6, height=6, units = "in", res = 300)
plotExpression(dex.norm, gene_id, samps, isProportion = FALSE)
dev.off()

Session info

> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Ubuntu 18.04.4 LTS

Matrix products: default
BLAS/LAPACK: /home/USER/miniconda3/lib/libopenblasp-r0.3.9.so

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
 [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
 [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] ggbeeswarm_0.6.0            ggplot2_3.3.0              
 [3] reshape2_1.4.4              stageR_1.8.0               
 [5] DEXSeq_1.32.0               RColorBrewer_1.1-2         
 [7] DESeq2_1.26.0               SummarizedExperiment_1.16.1
 [9] DelayedArray_0.12.3         matrixStats_0.56.0         
[11] BiocParallel_1.20.1         tximport_1.14.2            
[13] GenomicFeatures_1.38.2      AnnotationDbi_1.48.0       
[15] Biobase_2.46.0              GenomicRanges_1.38.0       
[17] GenomeInfoDb_1.22.1         IRanges_2.20.2             
[19] S4Vectors_0.24.4            BiocGenerics_0.32.0        
[21] data.table_1.12.8          

loaded via a namespace (and not attached):
 [1] colorspace_1.4-1         hwriter_1.3.2            ellipsis_0.3.0          
 [4] htmlTable_1.13.3         XVector_0.26.0           base64enc_0.1-3         
 [7] rstudioapi_0.11          farver_2.0.3             bit64_0.9-7             
[10] splines_3.6.3            geneplotter_1.64.0       knitr_1.28              
[13] Formula_1.2-3            jsonlite_1.6.1           Rsamtools_2.2.3         
[16] annotate_1.64.0          cluster_2.1.0            dbplyr_1.4.3            
[19] png_0.1-7                readr_1.3.1              compiler_3.6.3          
[22] httr_1.4.1               backports_1.1.6          assertthat_0.2.1        
[25] Matrix_1.2-18            limma_3.42.2             acepack_1.4.1           
[28] htmltools_0.4.0          prettyunits_1.1.1        tools_3.6.3             
[31] gtable_0.3.0             glue_1.4.0               GenomeInfoDbData_1.2.2  
[34] dplyr_0.8.5              rappdirs_0.3.1           Rcpp_1.0.4.6            
[37] DRIMSeq_1.14.0           vctrs_0.2.4              Biostrings_2.54.0       
[40] rtracklayer_1.46.0       xfun_0.13                stringr_1.4.0           
[43] lifecycle_0.2.0          statmod_1.4.34           XML_3.99-0.3            
[46] edgeR_3.28.1             zlibbioc_1.32.0          scales_1.1.0            
[49] hms_0.5.3                curl_4.3                 memoise_1.1.0           
[52] gridExtra_2.3            biomaRt_2.42.1           rpart_4.1-15            
[55] latticeExtra_0.6-29      stringi_1.4.6            RSQLite_2.2.0           
[58] genefilter_1.68.0        checkmate_2.0.0          rlang_0.4.6             
[61] pkgconfig_2.0.3          bitops_1.0-6             lattice_0.20-41         
[64] purrr_0.3.4              GenomicAlignments_1.22.1 htmlwidgets_1.5.1       
[67] labeling_0.3             bit_1.1-15.2             tidyselect_1.0.0        
[70] plyr_1.8.6               magrittr_1.5             R6_2.4.1                
[73] Hmisc_4.4-0              DBI_1.1.0                pillar_1.4.4            
[76] foreign_0.8-76           withr_2.2.0              survival_3.1-12         
[79] RCurl_1.98-1.2           nnet_7.3-14              tibble_3.0.1            
[82] crayon_1.3.4             BiocFileCache_1.10.2     jpeg_0.1-8.1            
[85] progress_1.2.2           locfit_1.5-9.4           grid_3.6.3              
[88] blob_1.2.1               digest_0.6.25            xtable_1.8-4            
[91] openssl_1.4.1            munsell_0.5.0            beeswarm_0.2.3          
[94] vipor_0.4.5              askpass_1.1

PreviousDTU analysis using DRIMSeq

Last updated 5 years ago

Was this helpful?