This is a lightweight wrapper to the import family of functions defined in rtracklayer.

Read common interval based formats as GRanges.

read_bed(file, col_names = NULL, genome_info = NULL, overlap_ranges = NULL)

read_bed_graph(
  file,
  col_names = NULL,
  genome_info = NULL,
  overlap_ranges = NULL
)

read_narrowpeaks(
  file,
  col_names = NULL,
  genome_info = NULL,
  overlap_ranges = NULL
)

Arguments

file

A path to a file or a connection.

col_names

An optional character vector for including additional columns in file that are not part of the BED/narrowPeaks specification.

genome_info

An optional character string or a Ranges object that contains information about the genome build. For example the USSC identifier "hg19" will add build information to the returned GRanges.

overlap_ranges

An optional Ranges object. Only the intervals in the file that overlap the Ranges will be returned.

Value

A GRanges object

Details

This is a lightweight wrapper to the import family of functions defined in rtracklayer. The read_narrowpeaks function parses the ENCODE narrowPeak BED format (see https://genome.ucsc.edu/FAQ/FAQformat.html#format12 for details.). As such the parser expects four additional columns called (corresponding to the narrowPeaks spec):

  • signalValue

  • pValue

  • qValue

  • peak

See also

rtracklayer::BEDFile()

Examples

test_path <- system.file("tests", package = "rtracklayer")
bed_file <- file.path(test_path, "test.bed")
gr <- read_bed(bed_file)
gr
#> GRanges object with 5 ranges and 5 metadata columns:
#>       seqnames              ranges strand |        name     score     itemRgb
#>          <Rle>           <IRanges>  <Rle> | <character> <numeric> <character>
#>   [1]     chr7 127471197-127472363      + |        Pos1         0     #FF0000
#>   [2]     chr7 127472364-127473530      + |        Pos2         2     #FF0000
#>   [3]     chr7 127473531-127474697      - |        Neg1         0     #FF0000
#>   [4]     chr9 127474698-127475864      + |        Pos3         5     #FF0000
#>   [5]     chr9 127475865-127477031      - |        Neg2         5     #0000FF
#>                     thick                  blocks
#>                 <IRanges>           <IRangesList>
#>   [1] 127471197-127472363 1-300,501-700,1068-1167
#>   [2] 127472364-127473530          1-250,668-1167
#>   [3] 127473531-127474697                  1-1167
#>   [4] 127474698-127475864                  1-1167
#>   [5] 127475865-127477031                  1-1167
#>   -------
#>   seqinfo: 2 sequences from an unspecified genome; no seqlengths
gr <- read_bed(bed_file, genome_info = "hg19")
#> 
#> Attaching package: ‘Biostrings’
#> The following object is masked from ‘package:base’:
#> 
#>     strsplit
#> 
#> Attaching package: ‘rtracklayer’
#> The following object is masked from ‘package:BiocIO’:
#> 
#>     FileForFormat
gr
#> GRanges object with 5 ranges and 5 metadata columns:
#>       seqnames              ranges strand |        name     score     itemRgb
#>          <Rle>           <IRanges>  <Rle> | <character> <numeric> <character>
#>   [1]     chr7 127471197-127472363      + |        Pos1         0     #FF0000
#>   [2]     chr7 127472364-127473530      + |        Pos2         2     #FF0000
#>   [3]     chr7 127473531-127474697      - |        Neg1         0     #FF0000
#>   [4]     chr9 127474698-127475864      + |        Pos3         5     #FF0000
#>   [5]     chr9 127475865-127477031      - |        Neg2         5     #0000FF
#>                     thick                  blocks
#>                 <IRanges>           <IRangesList>
#>   [1] 127471197-127472363 1-300,501-700,1068-1167
#>   [2] 127472364-127473530          1-250,668-1167
#>   [3] 127473531-127474697                  1-1167
#>   [4] 127474698-127475864                  1-1167
#>   [5] 127475865-127477031                  1-1167
#>   -------
#>   seqinfo: 298 sequences (2 circular) from hg19 genome
olap <-  as_granges(data.frame(seqnames = "chr7", start = 1, end = 127473000))
gr <- read_bed(bed_file,
              overlap_ranges = olap)
# bedGraph
bg_file <- file.path(test_path, "test.bedGraph")
gr <- read_bed_graph(bg_file)
gr
#> GRanges object with 9 ranges and 1 metadata column:
#>       seqnames            ranges strand |     score
#>          <Rle>         <IRanges>  <Rle> | <numeric>
#>   [1]    chr19 59102001-59102300      * |     -1.00
#>   [2]    chr19 59102301-59102600      * |     -0.75
#>   [3]    chr19 59102601-59102900      * |     -0.50
#>   [4]    chr19 59102901-59103200      * |     -0.25
#>   [5]    chr19 59103201-59103500      * |      0.00
#>   [6]    chr19 59103501-59103800      * |      0.25
#>   [7]    chr17 59103801-59104100      * |      0.50
#>   [8]    chr18 59104101-59104400      * |      0.75
#>   [9]    chr18 59104401-59104700      * |      1.00
#>   -------
#>   seqinfo: 3 sequences from an unspecified genome; no seqlengths
# narrowpeaks
np_file <- system.file("extdata", "demo.narrowPeak.gz",  package="rtracklayer")
gr <- read_narrowpeaks(np_file, genome_info = "hg19")
gr
#> GRanges object with 6 ranges and 6 metadata columns:
#>       seqnames              ranges strand |        name     score signalValue
#>          <Rle>           <IRanges>  <Rle> | <character> <numeric>   <numeric>
#>   [1]    chr19       893203-893659      * |        <NA>         0      16.705
#>   [2]    chr19   45981393-45982404      * |        <NA>         0       7.853
#>   [3]     chr9 137029517-137029957      * |        <NA>         0      19.135
#>   [4]     chr7 148680558-148681129      * |        <NA>         0      10.418
#>   [5]     chr7 148638202-148638769      * |        <NA>         0       8.161
#>   [6]     chr6   52860024-52860589      * |        <NA>         0       9.332
#>          pValue      qValue      peak
#>       <numeric>   <numeric> <integer>
#>   [1]    85.879 3.31125e-82       240
#>   [2]    83.114 1.44562e-79       587
#>   [3]    78.886 1.95499e-75       208
#>   [4]    70.532 3.68097e-67       263
#>   [5]    60.506 2.60538e-57       341
#>   [6]    54.023 7.13040e-51       349
#>   -------
#>   seqinfo: 298 sequences (2 circular) from hg19 genome