DNA Methylation at Various Genic Structure Regions

Again, we use intersectBed and groupBy commands to calculate the average methylation values at each designated region.

Intergenic Regions

# Console output

# intersectBed 
chr1    0       1736    chr1    468     469     0
chr1    0       1736    chr1    470     471     0.666667
chr1    0       1736    chr1    483     484     0.5
chr1    0       1736    chr1    488     489     1
chr1    0       1736    chr1    492     493     0.857143

# groupBy
chr1    0       1736    97      0.19548
chr1    20972   24416   27      0
chr1    25944   42911   148     0.10567
chr1    44799   52810   39      0.56999
chr1    53750   58917   21      0.22781
cd ~/

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.intergenic.bed.gz -b /work3/NRPB1219/hg18_h1_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.intergenic.h1.meth"

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.intergenic.bed.gz -b /work3/NRPB1219/hg18_imr90_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.intergenic.imr90.meth"

Merged Promoter Regions

# Console output

# intersectBed
chr1    736     2372    chr1    747     748     0
chr1    736     2372    chr1    749     750     0
chr1    736     2372    chr1    765     766     0
chr1    736     2372    chr1    770     771     0
chr1    736     2372    chr1    776     777     0

# groupBy
chr1    736     2372    71      0
chr1    18416   20728   136     0
chr1    25436   26944   22      0
chr1    41911   43411   8       0.1875
chr1    51810   53310   14      0.65505
cd ~/

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.promoter_merged.bed.gz -b /work3/NRPB1219/hg18_h1_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.promoter_merged.h1.meth"

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.promoter_merged.bed.gz -b /work3/NRPB1219/hg18_imr90_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.promoter_merged.imr90.meth"

Merged Exonic Regions

# Console output

# intersectBed
chr1    1736    2090    chr1    1741    1742    0
chr1    1736    2090    chr1    1808    1809    0
chr1    1736    2090    chr1    1822    1823    0
chr1    2475    2584    chr1    2534    2535    0
chr1    2475    2584    chr1    2537    2538    0

# groupBy
chr1    1736    2090    3       0
chr1    2475    2584    3       0
chr1    3083    4692    32      0.0625
chr1    4832    4901    3       0
chr1    5658    5810    5       0
cd ~/

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.exon_merged.bed.gz -b /work3/NRPB1219/hg18_h1_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.exon_merged.h1.meth"

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.exon_merged.bed.gz -b /work3/NRPB1219/hg18_imr90_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.exon_merged.imr90.meth"

Intronic Regions

# Console output

# intersectBed
chr1    2090    2475    chr1    2127    2128    0
chr1    2090    2475    chr1    2133    2134    0
chr1    2090    2475    chr1    2140    2141    0
chr1    2090    2475    chr1    2167    2168    0
chr1    2090    2475    chr1    2180    2181    0

# groupBy
chr1    2090    2475    12      0
chr1    2584    2837    7       0
chr1    2915    3083    2       0.33333
chr1    4692    4832    3       0.41667
chr1    4901    5658    19      0
cd ~/

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.intron_merged.bed.gz -b /work3/NRPB1219/hg18_h1_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.intron_merged.h1.meth"

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.intron_merged.bed.gz -b /work3/NRPB1219/hg18_imr90_meth.bedGraph -wa -wb | groupBy -i - -g 1-3 -c 7,7 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,sprintf(\"%.4f\",\$5) }' > Output/gencode.v3c.intron_merged.imr90.meth"

UTR Regions

# Console output

# intersectBed
chr1    1737    2090    5_UTR   ENST00000456328 +       chr1    1741    1742    0
chr1    1737    2090    5_UTR   ENST00000456328 +       chr1    1808    1809    0
chr1    1737    2090    5_UTR   ENST00000456328 +       chr1    1822    1823    0
chr1    2476    2584    5_UTR   ENST00000456328 +       chr1    2534    2535    0
chr1    2476    2584    5_UTR   ENST00000456328 +       chr1    2537    2538    0

# groupBy
chr1    1737    2090    5_UTR   ENST00000456328 +       3       0
chr1    2476    2584    5_UTR   ENST00000456328 +       3       0
chr1    3084    4021    3_UTR   ENST00000456328 +       14      0.14286
chr1    4226    4561    3_UTR   ENST00000438504 -       7       0
chr1    4226    4692    3_UTR   ENST00000423562 -       15      0
cd ~/

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.UTR.bed.gz -b /work3/NRPB1219/hg18_h1_meth.bedGraph -wa -wb | groupBy -i - -g 1-6 -c 10,10 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,\$5,\$6,\$7,sprintf(\"%.4f\",\$8) }' > Output/gencode.v3c.UTR.h1.meth"

bsub -q 16G -o stdout -e stderr "intersectBed -a Data/gencode.v3c.UTR.bed.gz -b /work3/NRPB1219/hg18_imr90_meth.bedGraph -wa -wb | groupBy -i - -g 1-6 -c 10,10 -o count,mean | awk -F $'\t' 'BEGIN { OFS=FS } { print \$1,\$2,\$3,\$4,\$5,\$6,\$7,sprintf(\"%.4f\",\$8) }' > Output/gencode.v3c.UTR.imr90.meth"

Check output files

Use bjobs to check the all jobs have completed and ls to check the files was in the "Output" folder.

ls -la ~/Output/gencode.v3c.*

# Console output

-rw------- 1 s00yao00 s00yao00 7640285 2014-12-20 18:23 /home/s00yao00/Output/gencode.v3c.exon_merged.h1.meth
-rw------- 1 s00yao00 s00yao00 7640285 2014-12-20 18:23 /home/s00yao00/Output/gencode.v3c.exon_merged.imr90.meth
-rw------- 1 s00yao00 s00yao00 1048190 2014-12-20 18:25 /home/s00yao00/Output/gencode.v3c.intergenic.h1.meth
-rw------- 1 s00yao00 s00yao00 1048190 2014-12-20 18:25 /home/s00yao00/Output/gencode.v3c.intergenic.imr90.meth
-rw------- 1 s00yao00 s00yao00 7621155 2014-12-20 18:25 /home/s00yao00/Output/gencode.v3c.intron_merged.h1.meth
-rw------- 1 s00yao00 s00yao00 7621155 2014-12-20 18:25 /home/s00yao00/Output/gencode.v3c.intron_merged.imr90.meth
-rw------- 1 s00yao00 s00yao00 2100589 2014-12-20 18:23 /home/s00yao00/Output/gencode.v3c.promoter_merged.h1.meth
-rw------- 1 s00yao00 s00yao00 2100589 2014-12-20 18:23 /home/s00yao00/Output/gencode.v3c.promoter_merged.imr90.meth
-rw------- 1 s00yao00 s00yao00 9696115 2014-12-20 18:36 /home/s00yao00/Output/gencode.v3c.UTR.h1.meth
-rw------- 1 s00yao00 s00yao00 9696115 2014-12-20 18:24 /home/s00yao00/Output/gencode.v3c.UTR.imr90.meth

Last updated